git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "h261.h"
  44 #include "h263.h"
  45 #include "mjpegenc_common.h"
  46 #include "mathops.h"
  47 #include "mpegutils.h"
  48 #include "mjpegenc.h"
  49 #include "msmpeg4.h"
  50 #include "pixblockdsp.h"
  51 #include "qpeldsp.h"
  52 #include "faandct.h"
  53 #include "thread.h"
  54 #include "aandcttab.h"
  55 #include "flv.h"
  56 #include "mpeg4video.h"
  57 #include "internal.h"
  58 #include "bytestream.h"
  59 #include <limits.h>
  60
  61 static int encode_picture(MpegEncContext *s, int picture_number);
  62 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  63 static int sse_mb(MpegEncContext *s);
  64 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  65 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  66
  67 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  68 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  69
  70 const AVOption ff_mpv_generic_options[] = {
  71     FF_MPV_COMMON_OPTS
  72     { NULL },
  73 };
  74
  75 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  76                        uint16_t (*qmat16)[2][64],
  77                        const uint16_t *quant_matrix,
  78                        int bias, int qmin, int qmax, int intra)
  79 {
  80     FDCTDSPContext *fdsp = &s->fdsp;
  81     int qscale;
  82     int shift = 0;
  83
  84     for (qscale = qmin; qscale <= qmax; qscale++) {
  85         int i;
  86         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  87             fdsp->fdct == ff_jpeg_fdct_islow_10 ||
  88             fdsp->fdct == ff_faandct) {
  89             for (i = 0; i < 64; i++) {
  90                 const int j = s->idsp.idct_permutation[i];
  91                 /* 16 <= qscale * quant_matrix[i] <= 7905
  92                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  93                  *             19952 <=              x  <= 249205026
  94                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
  95                  *           3444240 >= (1 << 36) / (x) >= 275 */
  96
  97                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
  98                                         (qscale * quant_matrix[j]));
  99             }
 100         } else if (fdsp->fdct == ff_fdct_ifast) {
 101             for (i = 0; i < 64; i++) {
 102                 const int j = s->idsp.idct_permutation[i];
 103                 /* 16 <= qscale * quant_matrix[i] <= 7905
 104                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 105                  *             19952 <=              x  <= 249205026
 106                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 107                  *           3444240 >= (1 << 36) / (x) >= 275 */
 108
 109                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 110                                         (ff_aanscales[i] * qscale *
 111                                          quant_matrix[j]));
 112             }
 113         } else {
 114             for (i = 0; i < 64; i++) {
 115                 const int j = s->idsp.idct_permutation[i];
 116                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 117                  * Assume x = qscale * quant_matrix[i]
 118                  * So             16 <=              x  <= 7905
 119                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 120                  * so          32768 >= (1 << 19) / (x) >= 67 */
 121                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 122                                         (qscale * quant_matrix[j]));
 123                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 124                 //                    (qscale * quant_matrix[i]);
 125                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 126                                        (qscale * quant_matrix[j]);
 127
 128                 if (qmat16[qscale][0][i] == 0 ||
 129                     qmat16[qscale][0][i] == 128 * 256)
 130                     qmat16[qscale][0][i] = 128 * 256 - 1;
 131                 qmat16[qscale][1][i] =
 132                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 133                                 qmat16[qscale][0][i]);
 134             }
 135         }
 136
 137         for (i = intra; i < 64; i++) {
 138             int64_t max = 8191;
 139             if (fdsp->fdct == ff_fdct_ifast) {
 140                 max = (8191LL * ff_aanscales[i]) >> 14;
 141             }
 142             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 143                 shift++;
 144             }
 145         }
 146     }
 147     if (shift) {
 148         av_log(NULL, AV_LOG_INFO,
 149                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 150                QMAT_SHIFT - shift);
 151     }
 152 }
 153
 154 static inline void update_qscale(MpegEncContext *s)
 155 {
 156     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 157                 (FF_LAMBDA_SHIFT + 7);
 158     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 159
 160     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 161                  FF_LAMBDA_SHIFT;
 162 }
 163
 164 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 165 {
 166     int i;
 167
 168     if (matrix) {
 169         put_bits(pb, 1, 1);
 170         for (i = 0; i < 64; i++) {
 171             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 172         }
 173     } else
 174         put_bits(pb, 1, 0);
 175 }
 176
 177 /**
 178  * init s->current_picture.qscale_table from s->lambda_table
 179  */
 180 void ff_init_qscale_tab(MpegEncContext *s)
 181 {
 182     int8_t * const qscale_table = s->current_picture.qscale_table;
 183     int i;
 184
 185     for (i = 0; i < s->mb_num; i++) {
 186         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 187         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 188         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 189                                                   s->avctx->qmax);
 190     }
 191 }
 192
 193 static void update_duplicate_context_after_me(MpegEncContext *dst,
 194                                               MpegEncContext *src)
 195 {
 196 #define COPY(a) dst->a= src->a
 197     COPY(pict_type);
 198     COPY(current_picture);
 199     COPY(f_code);
 200     COPY(b_code);
 201     COPY(qscale);
 202     COPY(lambda);
 203     COPY(lambda2);
 204     COPY(picture_in_gop_number);
 205     COPY(gop_picture_number);
 206     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 207     COPY(progressive_frame);    // FIXME don't set in encode_header
 208     COPY(partitioned_frame);    // FIXME don't set in encode_header
 209 #undef COPY
 210 }
 211
 212 /**
 213  * Set the given MpegEncContext to defaults for encoding.
 214  * the changed fields will not depend upon the prior state of the MpegEncContext.
 215  */
 216 static void MPV_encode_defaults(MpegEncContext *s)
 217 {
 218     int i;
 219     ff_MPV_common_defaults(s);
 220
 221     for (i = -16; i < 16; i++) {
 222         default_fcode_tab[i + MAX_MV] = 1;
 223     }
 224     s->me.mv_penalty = default_mv_penalty;
 225     s->fcode_tab     = default_fcode_tab;
 226
 227     s->input_picture_number  = 0;
 228     s->picture_in_gop_number = 0;
 229 }
 230
 231 /* init video encoder */
 232 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
 233 {
 234     MpegEncContext *s = avctx->priv_data;
 235     int i, ret, format_supported;
 236
 237     MPV_encode_defaults(s);
 238
 239     switch (avctx->codec_id) {
 240     case AV_CODEC_ID_MPEG2VIDEO:
 241         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 242             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 243             av_log(avctx, AV_LOG_ERROR,
 244                    "only YUV420 and YUV422 are supported\n");
 245             return -1;
 246         }
 247         break;
 248     case AV_CODEC_ID_MJPEG:
 249         format_supported = 0;
 250         /* JPEG color space */
 251         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 252             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 253             (avctx->color_range == AVCOL_RANGE_JPEG &&
 254              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 255               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
 256             format_supported = 1;
 257         /* MPEG color space */
 258         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 259                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 260                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
 261             format_supported = 1;
 262
 263         if (!format_supported) {
 264             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 265             return -1;
 266         }
 267         break;
 268     default:
 269         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 270             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 271             return -1;
 272         }
 273     }
 274
 275     switch (avctx->pix_fmt) {
 276     case AV_PIX_FMT_YUVJ422P:
 277     case AV_PIX_FMT_YUV422P:
 278         s->chroma_format = CHROMA_422;
 279         break;
 280     case AV_PIX_FMT_YUVJ420P:
 281     case AV_PIX_FMT_YUV420P:
 282     default:
 283         s->chroma_format = CHROMA_420;
 284         break;
 285     }
 286
 287     s->bit_rate = avctx->bit_rate;
 288     s->width    = avctx->width;
 289     s->height   = avctx->height;
 290     if (avctx->gop_size > 600 &&
 291         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 292         av_log(avctx, AV_LOG_ERROR,
 293                "Warning keyframe interval too large! reducing it ...\n");
 294         avctx->gop_size = 600;
 295     }
 296     s->gop_size     = avctx->gop_size;
 297     s->avctx        = avctx;
 298     s->flags        = avctx->flags;
 299     s->flags2       = avctx->flags2;
 300     if (avctx->max_b_frames > MAX_B_FRAMES) {
 301         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 302                "is %d.\n", MAX_B_FRAMES);
 303     }
 304     s->max_b_frames = avctx->max_b_frames;
 305     s->codec_id     = avctx->codec->id;
 306     s->strict_std_compliance = avctx->strict_std_compliance;
 307     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 308     s->mpeg_quant         = avctx->mpeg_quant;
 309     s->rtp_mode           = !!avctx->rtp_payload_size;
 310     s->intra_dc_precision = avctx->intra_dc_precision;
 311     s->user_specified_pts = AV_NOPTS_VALUE;
 312
 313     if (s->gop_size <= 1) {
 314         s->intra_only = 1;
 315         s->gop_size   = 12;
 316     } else {
 317         s->intra_only = 0;
 318     }
 319
 320     s->me_method = avctx->me_method;
 321
 322     /* Fixed QSCALE */
 323     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 324
 325     s->adaptive_quant = (s->avctx->lumi_masking ||
 326                          s->avctx->dark_masking ||
 327                          s->avctx->temporal_cplx_masking ||
 328                          s->avctx->spatial_cplx_masking  ||
 329                          s->avctx->p_masking      ||
 330                          s->avctx->border_masking ||
 331                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 332                         !s->fixed_qscale;
 333
 334     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 335
 336     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 337         av_log(avctx, AV_LOG_ERROR,
 338                "a vbv buffer size is needed, "
 339                "for encoding with a maximum bitrate\n");
 340         return -1;
 341     }
 342
 343     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 344         av_log(avctx, AV_LOG_INFO,
 345                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 346     }
 347
 348     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 349         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 350         return -1;
 351     }
 352
 353     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 354         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 355         return -1;
 356     }
 357
 358     if (avctx->rc_max_rate &&
 359         avctx->rc_max_rate == avctx->bit_rate &&
 360         avctx->rc_max_rate != avctx->rc_min_rate) {
 361         av_log(avctx, AV_LOG_INFO,
 362                "impossible bitrate constraints, this will fail\n");
 363     }
 364
 365     if (avctx->rc_buffer_size &&
 366         avctx->bit_rate * (int64_t)avctx->time_base.num >
 367             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 368         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 369         return -1;
 370     }
 371
 372     if (!s->fixed_qscale &&
 373         avctx->bit_rate * av_q2d(avctx->time_base) >
 374             avctx->bit_rate_tolerance) {
 375         av_log(avctx, AV_LOG_ERROR,
 376                "bitrate tolerance too small for bitrate\n");
 377         return -1;
 378     }
 379
 380     if (s->avctx->rc_max_rate &&
 381         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 382         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 383          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 384         90000LL * (avctx->rc_buffer_size - 1) >
 385             s->avctx->rc_max_rate * 0xFFFFLL) {
 386         av_log(avctx, AV_LOG_INFO,
 387                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 388                "specified vbv buffer is too large for the given bitrate!\n");
 389     }
 390
 391     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 392         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 393         s->codec_id != AV_CODEC_ID_FLV1) {
 394         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 395         return -1;
 396     }
 397
 398     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 399         av_log(avctx, AV_LOG_ERROR,
 400                "OBMC is only supported with simple mb decision\n");
 401         return -1;
 402     }
 403
 404     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 405         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 406         return -1;
 407     }
 408
 409     if (s->max_b_frames                    &&
 410         s->codec_id != AV_CODEC_ID_MPEG4      &&
 411         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 412         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 413         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 414         return -1;
 415     }
 416
 417     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 418          s->codec_id == AV_CODEC_ID_H263  ||
 419          s->codec_id == AV_CODEC_ID_H263P) &&
 420         (avctx->sample_aspect_ratio.num > 255 ||
 421          avctx->sample_aspect_ratio.den > 255)) {
 422         av_log(avctx, AV_LOG_ERROR,
 423                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
 424                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 425         return -1;
 426     }
 427
 428     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 429         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 430         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 431         return -1;
 432     }
 433
 434     // FIXME mpeg2 uses that too
 435     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
 436         av_log(avctx, AV_LOG_ERROR,
 437                "mpeg2 style quantization not supported by codec\n");
 438         return -1;
 439     }
 440
 441     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 442         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 443         return -1;
 444     }
 445
 446     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 447         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 448         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 449         return -1;
 450     }
 451
 452     if (s->avctx->scenechange_threshold < 1000000000 &&
 453         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 454         av_log(avctx, AV_LOG_ERROR,
 455                "closed gop with scene change detection are not supported yet, "
 456                "set threshold to 1000000000\n");
 457         return -1;
 458     }
 459
 460     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 461         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 462             av_log(avctx, AV_LOG_ERROR,
 463                   "low delay forcing is only available for mpeg2\n");
 464             return -1;
 465         }
 466         if (s->max_b_frames != 0) {
 467             av_log(avctx, AV_LOG_ERROR,
 468                    "b frames cannot be used with low delay\n");
 469             return -1;
 470         }
 471     }
 472
 473     if (s->q_scale_type == 1) {
 474         if (avctx->qmax > 12) {
 475             av_log(avctx, AV_LOG_ERROR,
 476                    "non linear quant only supports qmax <= 12 currently\n");
 477             return -1;
 478         }
 479     }
 480
 481     if (s->avctx->thread_count > 1         &&
 482         s->codec_id != AV_CODEC_ID_MPEG4      &&
 483         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 484         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 485         (s->codec_id != AV_CODEC_ID_H263P)) {
 486         av_log(avctx, AV_LOG_ERROR,
 487                "multi threaded encoding not supported by codec\n");
 488         return -1;
 489     }
 490
 491     if (s->avctx->thread_count < 1) {
 492         av_log(avctx, AV_LOG_ERROR,
 493                "automatic thread number detection not supported by codec,"
 494                "patch welcome\n");
 495         return -1;
 496     }
 497
 498     if (s->avctx->thread_count > 1)
 499         s->rtp_mode = 1;
 500
 501     if (!avctx->time_base.den || !avctx->time_base.num) {
 502         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 503         return -1;
 504     }
 505
 506     i = (INT_MAX / 2 + 128) >> 8;
 507     if (avctx->mb_threshold >= i) {
 508         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 509                i - 1);
 510         return -1;
 511     }
 512
 513     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 514         av_log(avctx, AV_LOG_INFO,
 515                "notice: b_frame_strategy only affects the first pass\n");
 516         avctx->b_frame_strategy = 0;
 517     }
 518
 519     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 520     if (i > 1) {
 521         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 522         avctx->time_base.den /= i;
 523         avctx->time_base.num /= i;
 524         //return -1;
 525     }
 526
 527     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 528         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
 529         // (a + x * 3 / 8) / x
 530         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 531         s->inter_quant_bias = 0;
 532     } else {
 533         s->intra_quant_bias = 0;
 534         // (a - x / 4) / x
 535         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 536     }
 537
 538     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 539         s->intra_quant_bias = avctx->intra_quant_bias;
 540     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 541         s->inter_quant_bias = avctx->inter_quant_bias;
 542
 543     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 544         s->avctx->time_base.den > (1 << 16) - 1) {
 545         av_log(avctx, AV_LOG_ERROR,
 546                "timebase %d/%d not supported by MPEG 4 standard, "
 547                "the maximum admitted value for the timebase denominator "
 548                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 549                (1 << 16) - 1);
 550         return -1;
 551     }
 552     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 553
 554     switch (avctx->codec->id) {
 555     case AV_CODEC_ID_MPEG1VIDEO:
 556         s->out_format = FMT_MPEG1;
 557         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 558         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 559         break;
 560     case AV_CODEC_ID_MPEG2VIDEO:
 561         s->out_format = FMT_MPEG1;
 562         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 563         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 564         s->rtp_mode   = 1;
 565         break;
 566     case AV_CODEC_ID_MJPEG:
 567         s->out_format = FMT_MJPEG;
 568         s->intra_only = 1; /* force intra only for jpeg */
 569         if (!CONFIG_MJPEG_ENCODER ||
 570             ff_mjpeg_encode_init(s) < 0)
 571             return -1;
 572         avctx->delay = 0;
 573         s->low_delay = 1;
 574         break;
 575     case AV_CODEC_ID_H261:
 576         if (!CONFIG_H261_ENCODER)
 577             return -1;
 578         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 579             av_log(avctx, AV_LOG_ERROR,
 580                    "The specified picture size of %dx%d is not valid for the "
 581                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 582                     s->width, s->height);
 583             return -1;
 584         }
 585         s->out_format = FMT_H261;
 586         avctx->delay  = 0;
 587         s->low_delay  = 1;
 588         break;
 589     case AV_CODEC_ID_H263:
 590         if (!CONFIG_H263_ENCODER)
 591         return -1;
 592         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 593                              s->width, s->height) == 8) {
 594             av_log(avctx, AV_LOG_INFO,
 595                    "The specified picture size of %dx%d is not valid for "
 596                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 597                    "352x288, 704x576, and 1408x1152."
 598                    "Try H.263+.\n", s->width, s->height);
 599             return -1;
 600         }
 601         s->out_format = FMT_H263;
 602         avctx->delay  = 0;
 603         s->low_delay  = 1;
 604         break;
 605     case AV_CODEC_ID_H263P:
 606         s->out_format = FMT_H263;
 607         s->h263_plus  = 1;
 608         /* Fx */
 609         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 610         s->modified_quant  = s->h263_aic;
 611         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 612         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 613
 614         /* /Fx */
 615         /* These are just to be sure */
 616         avctx->delay = 0;
 617         s->low_delay = 1;
 618         break;
 619     case AV_CODEC_ID_FLV1:
 620         s->out_format      = FMT_H263;
 621         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 622         s->unrestricted_mv = 1;
 623         s->rtp_mode  = 0; /* don't allow GOB */
 624         avctx->delay = 0;
 625         s->low_delay = 1;
 626         break;
 627     case AV_CODEC_ID_RV10:
 628         s->out_format = FMT_H263;
 629         avctx->delay  = 0;
 630         s->low_delay  = 1;
 631         break;
 632     case AV_CODEC_ID_RV20:
 633         s->out_format      = FMT_H263;
 634         avctx->delay       = 0;
 635         s->low_delay       = 1;
 636         s->modified_quant  = 1;
 637         s->h263_aic        = 1;
 638         s->h263_plus       = 1;
 639         s->loop_filter     = 1;
 640         s->unrestricted_mv = 0;
 641         break;
 642     case AV_CODEC_ID_MPEG4:
 643         s->out_format      = FMT_H263;
 644         s->h263_pred       = 1;
 645         s->unrestricted_mv = 1;
 646         s->low_delay       = s->max_b_frames ? 0 : 1;
 647         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 648         break;
 649     case AV_CODEC_ID_MSMPEG4V2:
 650         s->out_format      = FMT_H263;
 651         s->h263_pred       = 1;
 652         s->unrestricted_mv = 1;
 653         s->msmpeg4_version = 2;
 654         avctx->delay       = 0;
 655         s->low_delay       = 1;
 656         break;
 657     case AV_CODEC_ID_MSMPEG4V3:
 658         s->out_format        = FMT_H263;
 659         s->h263_pred         = 1;
 660         s->unrestricted_mv   = 1;
 661         s->msmpeg4_version   = 3;
 662         s->flipflop_rounding = 1;
 663         avctx->delay         = 0;
 664         s->low_delay         = 1;
 665         break;
 666     case AV_CODEC_ID_WMV1:
 667         s->out_format        = FMT_H263;
 668         s->h263_pred         = 1;
 669         s->unrestricted_mv   = 1;
 670         s->msmpeg4_version   = 4;
 671         s->flipflop_rounding = 1;
 672         avctx->delay         = 0;
 673         s->low_delay         = 1;
 674         break;
 675     case AV_CODEC_ID_WMV2:
 676         s->out_format        = FMT_H263;
 677         s->h263_pred         = 1;
 678         s->unrestricted_mv   = 1;
 679         s->msmpeg4_version   = 5;
 680         s->flipflop_rounding = 1;
 681         avctx->delay         = 0;
 682         s->low_delay         = 1;
 683         break;
 684     default:
 685         return -1;
 686     }
 687
 688     avctx->has_b_frames = !s->low_delay;
 689
 690     s->encoding = 1;
 691
 692     s->progressive_frame    =
 693     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 694                                                 CODEC_FLAG_INTERLACED_ME) ||
 695                                 s->alternate_scan);
 696
 697     /* init */
 698     if (ff_MPV_common_init(s) < 0)
 699         return -1;
 700
 701     if (ARCH_X86)
 702         ff_MPV_encode_init_x86(s);
 703
 704     ff_fdctdsp_init(&s->fdsp, avctx);
 705     ff_me_cmp_init(&s->mecc, avctx);
 706     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 707     ff_pixblockdsp_init(&s->pdsp, avctx);
 708     ff_qpeldsp_init(&s->qdsp);
 709
 710     s->avctx->coded_frame = s->current_picture.f;
 711
 712     if (s->msmpeg4_version) {
 713         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 714                           2 * 2 * (MAX_LEVEL + 1) *
 715                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 716     }
 717     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 718
 719     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 720     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 721     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 722     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 723     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 724                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 725     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 726                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 727
 728     if (s->avctx->noise_reduction) {
 729         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 730                           2 * 64 * sizeof(uint16_t), fail);
 731     }
 732
 733     if (CONFIG_H263_ENCODER)
 734         ff_h263dsp_init(&s->h263dsp);
 735     if (!s->dct_quantize)
 736         s->dct_quantize = ff_dct_quantize_c;
 737     if (!s->denoise_dct)
 738         s->denoise_dct  = denoise_dct_c;
 739     s->fast_dct_quantize = s->dct_quantize;
 740     if (avctx->trellis)
 741         s->dct_quantize  = dct_quantize_trellis_c;
 742
 743     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 744         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 745
 746     s->quant_precision = 5;
 747
 748     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 749     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
 750
 751     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 752         ff_h261_encode_init(s);
 753     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 754         ff_h263_encode_init(s);
 755     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 756         ff_msmpeg4_encode_init(s);
 757     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 758         && s->out_format == FMT_MPEG1)
 759         ff_mpeg1_encode_init(s);
 760
 761     /* init q matrix */
 762     for (i = 0; i < 64; i++) {
 763         int j = s->idsp.idct_permutation[i];
 764         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 765             s->mpeg_quant) {
 766             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 767             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 768         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 769             s->intra_matrix[j] =
 770             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 771         } else {
 772             /* mpeg1/2 */
 773             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 774             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 775         }
 776         if (s->avctx->intra_matrix)
 777             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 778         if (s->avctx->inter_matrix)
 779             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 780     }
 781
 782     /* precompute matrix */
 783     /* for mjpeg, we do include qscale in the matrix */
 784     if (s->out_format != FMT_MJPEG) {
 785         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 786                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 787                           31, 1);
 788         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 789                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 790                           31, 0);
 791     }
 792
 793     if (ff_rate_control_init(s) < 0)
 794         return -1;
 795
 796 #if FF_API_ERROR_RATE
 797     FF_DISABLE_DEPRECATION_WARNINGS
 798     if (avctx->error_rate)
 799         s->error_rate = avctx->error_rate;
 800     FF_ENABLE_DEPRECATION_WARNINGS;
 801 #endif
 802
 803 #if FF_API_NORMALIZE_AQP
 804     FF_DISABLE_DEPRECATION_WARNINGS
 805     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 806         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 807     FF_ENABLE_DEPRECATION_WARNINGS;
 808 #endif
 809
 810 #if FF_API_MV0
 811     FF_DISABLE_DEPRECATION_WARNINGS
 812     if (avctx->flags & CODEC_FLAG_MV0)
 813         s->mpv_flags |= FF_MPV_FLAG_MV0;
 814     FF_ENABLE_DEPRECATION_WARNINGS
 815 #endif
 816
 817     if (avctx->b_frame_strategy == 2) {
 818         for (i = 0; i < s->max_b_frames + 2; i++) {
 819             s->tmp_frames[i] = av_frame_alloc();
 820             if (!s->tmp_frames[i])
 821                 return AVERROR(ENOMEM);
 822
 823             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 824             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 825             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 826
 827             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 828             if (ret < 0)
 829                 return ret;
 830         }
 831     }
 832
 833     return 0;
 834 fail:
 835     ff_MPV_encode_end(avctx);
 836     return AVERROR_UNKNOWN;
 837 }
 838
 839 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
 840 {
 841     MpegEncContext *s = avctx->priv_data;
 842     int i;
 843
 844     ff_rate_control_uninit(s);
 845
 846     ff_MPV_common_end(s);
 847     if (CONFIG_MJPEG_ENCODER &&
 848         s->out_format == FMT_MJPEG)
 849         ff_mjpeg_encode_close(s);
 850
 851     av_freep(&avctx->extradata);
 852
 853     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 854         av_frame_free(&s->tmp_frames[i]);
 855
 856     ff_free_picture_tables(&s->new_picture);
 857     ff_mpeg_unref_picture(s, &s->new_picture);
 858
 859     av_freep(&s->avctx->stats_out);
 860     av_freep(&s->ac_stats);
 861
 862     av_freep(&s->q_intra_matrix);
 863     av_freep(&s->q_inter_matrix);
 864     av_freep(&s->q_intra_matrix16);
 865     av_freep(&s->q_inter_matrix16);
 866     av_freep(&s->input_picture);
 867     av_freep(&s->reordered_input_picture);
 868     av_freep(&s->dct_offset);
 869
 870     return 0;
 871 }
 872
 873 static int get_sae(uint8_t *src, int ref, int stride)
 874 {
 875     int x,y;
 876     int acc = 0;
 877
 878     for (y = 0; y < 16; y++) {
 879         for (x = 0; x < 16; x++) {
 880             acc += FFABS(src[x + y * stride] - ref);
 881         }
 882     }
 883
 884     return acc;
 885 }
 886
 887 static int get_intra_count(MpegEncContext *s, uint8_t *src,
 888                            uint8_t *ref, int stride)
 889 {
 890     int x, y, w, h;
 891     int acc = 0;
 892
 893     w = s->width  & ~15;
 894     h = s->height & ~15;
 895
 896     for (y = 0; y < h; y += 16) {
 897         for (x = 0; x < w; x += 16) {
 898             int offset = x + y * stride;
 899             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
 900                                       stride, 16);
 901             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
 902             int sae  = get_sae(src + offset, mean, stride);
 903
 904             acc += sae + 500 < sad;
 905         }
 906     }
 907     return acc;
 908 }
 909
 910
 911 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
 912 {
 913     Picture *pic = NULL;
 914     int64_t pts;
 915     int i, display_picture_number = 0, ret;
 916     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
 917                                                  (s->low_delay ? 0 : 1);
 918     int direct = 1;
 919
 920     if (pic_arg) {
 921         pts = pic_arg->pts;
 922         display_picture_number = s->input_picture_number++;
 923
 924         if (pts != AV_NOPTS_VALUE) {
 925             if (s->user_specified_pts != AV_NOPTS_VALUE) {
 926                 int64_t time = pts;
 927                 int64_t last = s->user_specified_pts;
 928
 929                 if (time <= last) {
 930                     av_log(s->avctx, AV_LOG_ERROR,
 931                            "Error, Invalid timestamp=%"PRId64", "
 932                            "last=%"PRId64"\n", pts, s->user_specified_pts);
 933                     return -1;
 934                 }
 935
 936                 if (!s->low_delay && display_picture_number == 1)
 937                     s->dts_delta = time - last;
 938             }
 939             s->user_specified_pts = pts;
 940         } else {
 941             if (s->user_specified_pts != AV_NOPTS_VALUE) {
 942                 s->user_specified_pts =
 943                 pts = s->user_specified_pts + 1;
 944                 av_log(s->avctx, AV_LOG_INFO,
 945                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
 946                        pts);
 947             } else {
 948                 pts = display_picture_number;
 949             }
 950         }
 951     }
 952
 953     if (pic_arg) {
 954         if (!pic_arg->buf[0]);
 955             direct = 0;
 956         if (pic_arg->linesize[0] != s->linesize)
 957             direct = 0;
 958         if (pic_arg->linesize[1] != s->uvlinesize)
 959             direct = 0;
 960         if (pic_arg->linesize[2] != s->uvlinesize)
 961             direct = 0;
 962
 963         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
 964                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
 965
 966         if (direct) {
 967             i = ff_find_unused_picture(s, 1);
 968             if (i < 0)
 969                 return i;
 970
 971             pic = &s->picture[i];
 972             pic->reference = 3;
 973
 974             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
 975                 return ret;
 976             if (ff_alloc_picture(s, pic, 1) < 0) {
 977                 return -1;
 978             }
 979         } else {
 980             i = ff_find_unused_picture(s, 0);
 981             if (i < 0)
 982                 return i;
 983
 984             pic = &s->picture[i];
 985             pic->reference = 3;
 986
 987             if (ff_alloc_picture(s, pic, 0) < 0) {
 988                 return -1;
 989             }
 990
 991             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
 992                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
 993                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
 994                 // empty
 995             } else {
 996                 int h_chroma_shift, v_chroma_shift;
 997                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
 998                                                  &h_chroma_shift,
 999                                                  &v_chroma_shift);
1000
1001                 for (i = 0; i < 3; i++) {
1002                     int src_stride = pic_arg->linesize[i];
1003                     int dst_stride = i ? s->uvlinesize : s->linesize;
1004                     int h_shift = i ? h_chroma_shift : 0;
1005                     int v_shift = i ? v_chroma_shift : 0;
1006                     int w = s->width  >> h_shift;
1007                     int h = s->height >> v_shift;
1008                     uint8_t *src = pic_arg->data[i];
1009                     uint8_t *dst = pic->f->data[i];
1010
1011                     if (!s->avctx->rc_buffer_size)
1012                         dst += INPLACE_OFFSET;
1013
1014                     if (src_stride == dst_stride)
1015                         memcpy(dst, src, src_stride * h);
1016                     else {
1017                         while (h--) {
1018                             memcpy(dst, src, w);
1019                             dst += dst_stride;
1020                             src += src_stride;
1021                         }
1022                     }
1023                 }
1024             }
1025         }
1026         ret = av_frame_copy_props(pic->f, pic_arg);
1027         if (ret < 0)
1028             return ret;
1029
1030         pic->f->display_picture_number = display_picture_number;
1031         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1032     }
1033
1034     /* shift buffer entries */
1035     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1036         s->input_picture[i - 1] = s->input_picture[i];
1037
1038     s->input_picture[encoding_delay] = (Picture*) pic;
1039
1040     return 0;
1041 }
1042
1043 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1044 {
1045     int x, y, plane;
1046     int score = 0;
1047     int64_t score64 = 0;
1048
1049     for (plane = 0; plane < 3; plane++) {
1050         const int stride = p->f->linesize[plane];
1051         const int bw = plane ? 1 : 2;
1052         for (y = 0; y < s->mb_height * bw; y++) {
1053             for (x = 0; x < s->mb_width * bw; x++) {
1054                 int off = p->shared ? 0 : 16;
1055                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1056                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1057                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1058
1059                 switch (s->avctx->frame_skip_exp) {
1060                 case 0: score    =  FFMAX(score, v);          break;
1061                 case 1: score   += FFABS(v);                  break;
1062                 case 2: score   += v * v;                     break;
1063                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1064                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1065                 }
1066             }
1067         }
1068     }
1069
1070     if (score)
1071         score64 = score;
1072
1073     if (score64 < s->avctx->frame_skip_threshold)
1074         return 1;
1075     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1076         return 1;
1077     return 0;
1078 }
1079
1080 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1081 {
1082     AVPacket pkt = { 0 };
1083     int ret, got_output;
1084
1085     av_init_packet(&pkt);
1086     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1087     if (ret < 0)
1088         return ret;
1089
1090     ret = pkt.size;
1091     av_free_packet(&pkt);
1092     return ret;
1093 }
1094
1095 static int estimate_best_b_count(MpegEncContext *s)
1096 {
1097     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1098     AVCodecContext *c = avcodec_alloc_context3(NULL);
1099     const int scale = s->avctx->brd_scale;
1100     int i, j, out_size, p_lambda, b_lambda, lambda2;
1101     int64_t best_rd  = INT64_MAX;
1102     int best_b_count = -1;
1103
1104     assert(scale >= 0 && scale <= 3);
1105
1106     //emms_c();
1107     //s->next_picture_ptr->quality;
1108     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1109     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1110     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1111     if (!b_lambda) // FIXME we should do this somewhere else
1112         b_lambda = p_lambda;
1113     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1114                FF_LAMBDA_SHIFT;
1115
1116     c->width        = s->width  >> scale;
1117     c->height       = s->height >> scale;
1118     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1119     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1120     c->mb_decision  = s->avctx->mb_decision;
1121     c->me_cmp       = s->avctx->me_cmp;
1122     c->mb_cmp       = s->avctx->mb_cmp;
1123     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1124     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1125     c->time_base    = s->avctx->time_base;
1126     c->max_b_frames = s->max_b_frames;
1127
1128     if (avcodec_open2(c, codec, NULL) < 0)
1129         return -1;
1130
1131     for (i = 0; i < s->max_b_frames + 2; i++) {
1132         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1133                                                 s->next_picture_ptr;
1134
1135         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1136             pre_input = *pre_input_ptr;
1137
1138             if (!pre_input.shared && i) {
1139                 pre_input.f->data[0] += INPLACE_OFFSET;
1140                 pre_input.f->data[1] += INPLACE_OFFSET;
1141                 pre_input.f->data[2] += INPLACE_OFFSET;
1142             }
1143
1144             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1145                                        s->tmp_frames[i]->linesize[0],
1146                                        pre_input.f->data[0],
1147                                        pre_input.f->linesize[0],
1148                                        c->width, c->height);
1149             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1150                                        s->tmp_frames[i]->linesize[1],
1151                                        pre_input.f->data[1],
1152                                        pre_input.f->linesize[1],
1153                                        c->width >> 1, c->height >> 1);
1154             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1155                                        s->tmp_frames[i]->linesize[2],
1156                                        pre_input.f->data[2],
1157                                        pre_input.f->linesize[2],
1158                                        c->width >> 1, c->height >> 1);
1159         }
1160     }
1161
1162     for (j = 0; j < s->max_b_frames + 1; j++) {
1163         int64_t rd = 0;
1164
1165         if (!s->input_picture[j])
1166             break;
1167
1168         c->error[0] = c->error[1] = c->error[2] = 0;
1169
1170         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1171         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1172
1173         out_size = encode_frame(c, s->tmp_frames[0]);
1174
1175         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1176
1177         for (i = 0; i < s->max_b_frames + 1; i++) {
1178             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1179
1180             s->tmp_frames[i + 1]->pict_type = is_p ?
1181                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1182             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1183
1184             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1185
1186             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1187         }
1188
1189         /* get the delayed frames */
1190         while (out_size) {
1191             out_size = encode_frame(c, NULL);
1192             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1193         }
1194
1195         rd += c->error[0] + c->error[1] + c->error[2];
1196
1197         if (rd < best_rd) {
1198             best_rd = rd;
1199             best_b_count = j;
1200         }
1201     }
1202
1203     avcodec_close(c);
1204     av_freep(&c);
1205
1206     return best_b_count;
1207 }
1208
1209 static int select_input_picture(MpegEncContext *s)
1210 {
1211     int i, ret;
1212
1213     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1214         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1215     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1216
1217     /* set next picture type & ordering */
1218     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1219         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1220             s->next_picture_ptr == NULL || s->intra_only) {
1221             s->reordered_input_picture[0] = s->input_picture[0];
1222             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1223             s->reordered_input_picture[0]->f->coded_picture_number =
1224                 s->coded_picture_number++;
1225         } else {
1226             int b_frames;
1227
1228             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1229                 if (s->picture_in_gop_number < s->gop_size &&
1230                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1231                     // FIXME check that te gop check above is +-1 correct
1232                     av_frame_unref(s->input_picture[0]->f);
1233
1234                     emms_c();
1235                     ff_vbv_update(s, 0);
1236
1237                     goto no_output_pic;
1238                 }
1239             }
1240
1241             if (s->flags & CODEC_FLAG_PASS2) {
1242                 for (i = 0; i < s->max_b_frames + 1; i++) {
1243                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1244
1245                     if (pict_num >= s->rc_context.num_entries)
1246                         break;
1247                     if (!s->input_picture[i]) {
1248                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1249                         break;
1250                     }
1251
1252                     s->input_picture[i]->f->pict_type =
1253                         s->rc_context.entry[pict_num].new_pict_type;
1254                 }
1255             }
1256
1257             if (s->avctx->b_frame_strategy == 0) {
1258                 b_frames = s->max_b_frames;
1259                 while (b_frames && !s->input_picture[b_frames])
1260                     b_frames--;
1261             } else if (s->avctx->b_frame_strategy == 1) {
1262                 for (i = 1; i < s->max_b_frames + 1; i++) {
1263                     if (s->input_picture[i] &&
1264                         s->input_picture[i]->b_frame_score == 0) {
1265                         s->input_picture[i]->b_frame_score =
1266                             get_intra_count(s,
1267                                             s->input_picture[i    ]->f->data[0],
1268                                             s->input_picture[i - 1]->f->data[0],
1269                                             s->linesize) + 1;
1270                     }
1271                 }
1272                 for (i = 0; i < s->max_b_frames + 1; i++) {
1273                     if (s->input_picture[i] == NULL ||
1274                         s->input_picture[i]->b_frame_score - 1 >
1275                             s->mb_num / s->avctx->b_sensitivity)
1276                         break;
1277                 }
1278
1279                 b_frames = FFMAX(0, i - 1);
1280
1281                 /* reset scores */
1282                 for (i = 0; i < b_frames + 1; i++) {
1283                     s->input_picture[i]->b_frame_score = 0;
1284                 }
1285             } else if (s->avctx->b_frame_strategy == 2) {
1286                 b_frames = estimate_best_b_count(s);
1287             } else {
1288                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1289                 b_frames = 0;
1290             }
1291
1292             emms_c();
1293
1294             for (i = b_frames - 1; i >= 0; i--) {
1295                 int type = s->input_picture[i]->f->pict_type;
1296                 if (type && type != AV_PICTURE_TYPE_B)
1297                     b_frames = i;
1298             }
1299             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1300                 b_frames == s->max_b_frames) {
1301                 av_log(s->avctx, AV_LOG_ERROR,
1302                        "warning, too many b frames in a row\n");
1303             }
1304
1305             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1306                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1307                     s->gop_size > s->picture_in_gop_number) {
1308                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1309                 } else {
1310                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1311                         b_frames = 0;
1312                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1313                 }
1314             }
1315
1316             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1317                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1318                 b_frames--;
1319
1320             s->reordered_input_picture[0] = s->input_picture[b_frames];
1321             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1322                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1323             s->reordered_input_picture[0]->f->coded_picture_number =
1324                 s->coded_picture_number++;
1325             for (i = 0; i < b_frames; i++) {
1326                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1327                 s->reordered_input_picture[i + 1]->f->pict_type =
1328                     AV_PICTURE_TYPE_B;
1329                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1330                     s->coded_picture_number++;
1331             }
1332         }
1333     }
1334 no_output_pic:
1335     if (s->reordered_input_picture[0]) {
1336         s->reordered_input_picture[0]->reference =
1337            s->reordered_input_picture[0]->f->pict_type !=
1338                AV_PICTURE_TYPE_B ? 3 : 0;
1339
1340         ff_mpeg_unref_picture(s, &s->new_picture);
1341         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1342             return ret;
1343
1344         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1345             // input is a shared pix, so we can't modifiy it -> alloc a new
1346             // one & ensure that the shared one is reuseable
1347
1348             Picture *pic;
1349             int i = ff_find_unused_picture(s, 0);
1350             if (i < 0)
1351                 return i;
1352             pic = &s->picture[i];
1353
1354             pic->reference = s->reordered_input_picture[0]->reference;
1355             if (ff_alloc_picture(s, pic, 0) < 0) {
1356                 return -1;
1357             }
1358
1359             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1360             if (ret < 0)
1361                 return ret;
1362
1363             /* mark us unused / free shared pic */
1364             av_frame_unref(s->reordered_input_picture[0]->f);
1365             s->reordered_input_picture[0]->shared = 0;
1366
1367             s->current_picture_ptr = pic;
1368         } else {
1369             // input is not a shared pix -> reuse buffer for current_pix
1370             s->current_picture_ptr = s->reordered_input_picture[0];
1371             for (i = 0; i < 4; i++) {
1372                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1373             }
1374         }
1375         ff_mpeg_unref_picture(s, &s->current_picture);
1376         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1377                                        s->current_picture_ptr)) < 0)
1378             return ret;
1379
1380         s->picture_number = s->new_picture.f->display_picture_number;
1381     } else {
1382         ff_mpeg_unref_picture(s, &s->new_picture);
1383     }
1384     return 0;
1385 }
1386
1387 static void frame_end(MpegEncContext *s)
1388 {
1389     int i;
1390
1391     if (s->unrestricted_mv &&
1392         s->current_picture.reference &&
1393         !s->intra_only) {
1394         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1395         int hshift = desc->log2_chroma_w;
1396         int vshift = desc->log2_chroma_h;
1397         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1398                                 s->h_edge_pos, s->v_edge_pos,
1399                                 EDGE_WIDTH, EDGE_WIDTH,
1400                                 EDGE_TOP | EDGE_BOTTOM);
1401         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1402                                 s->h_edge_pos >> hshift,
1403                                 s->v_edge_pos >> vshift,
1404                                 EDGE_WIDTH >> hshift,
1405                                 EDGE_WIDTH >> vshift,
1406                                 EDGE_TOP | EDGE_BOTTOM);
1407         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1408                                 s->h_edge_pos >> hshift,
1409                                 s->v_edge_pos >> vshift,
1410                                 EDGE_WIDTH >> hshift,
1411                                 EDGE_WIDTH >> vshift,
1412                                 EDGE_TOP | EDGE_BOTTOM);
1413     }
1414
1415     emms_c();
1416
1417     s->last_pict_type                 = s->pict_type;
1418     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1419     if (s->pict_type!= AV_PICTURE_TYPE_B)
1420         s->last_non_b_pict_type = s->pict_type;
1421
1422     if (s->encoding) {
1423         /* release non-reference frames */
1424         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1425             if (!s->picture[i].reference)
1426                 ff_mpeg_unref_picture(s, &s->picture[i]);
1427         }
1428     }
1429
1430     s->avctx->coded_frame = s->current_picture_ptr->f;
1431
1432 }
1433
1434 static void update_noise_reduction(MpegEncContext *s)
1435 {
1436     int intra, i;
1437
1438     for (intra = 0; intra < 2; intra++) {
1439         if (s->dct_count[intra] > (1 << 16)) {
1440             for (i = 0; i < 64; i++) {
1441                 s->dct_error_sum[intra][i] >>= 1;
1442             }
1443             s->dct_count[intra] >>= 1;
1444         }
1445
1446         for (i = 0; i < 64; i++) {
1447             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1448                                        s->dct_count[intra] +
1449                                        s->dct_error_sum[intra][i] / 2) /
1450                                       (s->dct_error_sum[intra][i] + 1);
1451         }
1452     }
1453 }
1454
1455 static int frame_start(MpegEncContext *s)
1456 {
1457     int ret;
1458
1459     /* mark & release old frames */
1460     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1461         s->last_picture_ptr != s->next_picture_ptr &&
1462         s->last_picture_ptr->f->buf[0]) {
1463         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1464     }
1465
1466     s->current_picture_ptr->f->pict_type = s->pict_type;
1467     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1468
1469     ff_mpeg_unref_picture(s, &s->current_picture);
1470     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1471                                    s->current_picture_ptr)) < 0)
1472         return ret;
1473
1474     if (s->pict_type != AV_PICTURE_TYPE_B) {
1475         s->last_picture_ptr = s->next_picture_ptr;
1476         if (!s->droppable)
1477             s->next_picture_ptr = s->current_picture_ptr;
1478     }
1479
1480     if (s->last_picture_ptr) {
1481         ff_mpeg_unref_picture(s, &s->last_picture);
1482         if (s->last_picture_ptr->f->buf[0] &&
1483             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1484                                        s->last_picture_ptr)) < 0)
1485             return ret;
1486     }
1487     if (s->next_picture_ptr) {
1488         ff_mpeg_unref_picture(s, &s->next_picture);
1489         if (s->next_picture_ptr->f->buf[0] &&
1490             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1491                                        s->next_picture_ptr)) < 0)
1492             return ret;
1493     }
1494
1495     if (s->picture_structure!= PICT_FRAME) {
1496         int i;
1497         for (i = 0; i < 4; i++) {
1498             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1499                 s->current_picture.f->data[i] +=
1500                     s->current_picture.f->linesize[i];
1501             }
1502             s->current_picture.f->linesize[i] *= 2;
1503             s->last_picture.f->linesize[i]    *= 2;
1504             s->next_picture.f->linesize[i]    *= 2;
1505         }
1506     }
1507
1508     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1509         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1510         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1511     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1512         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1513         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1514     } else {
1515         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1516         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1517     }
1518
1519     if (s->dct_error_sum) {
1520         assert(s->avctx->noise_reduction && s->encoding);
1521         update_noise_reduction(s);
1522     }
1523
1524     return 0;
1525 }
1526
1527 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1528                           const AVFrame *pic_arg, int *got_packet)
1529 {
1530     MpegEncContext *s = avctx->priv_data;
1531     int i, stuffing_count, ret;
1532     int context_count = s->slice_context_count;
1533
1534     s->picture_in_gop_number++;
1535
1536     if (load_input_picture(s, pic_arg) < 0)
1537         return -1;
1538
1539     if (select_input_picture(s) < 0) {
1540         return -1;
1541     }
1542
1543     /* output? */
1544     if (s->new_picture.f->data[0]) {
1545         if (!pkt->data &&
1546             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1547             return ret;
1548         if (s->mb_info) {
1549             s->mb_info_ptr = av_packet_new_side_data(pkt,
1550                                  AV_PKT_DATA_H263_MB_INFO,
1551                                  s->mb_width*s->mb_height*12);
1552             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1553         }
1554
1555         for (i = 0; i < context_count; i++) {
1556             int start_y = s->thread_context[i]->start_mb_y;
1557             int   end_y = s->thread_context[i]->  end_mb_y;
1558             int h       = s->mb_height;
1559             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1560             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1561
1562             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1563         }
1564
1565         s->pict_type = s->new_picture.f->pict_type;
1566         //emms_c();
1567         ret = frame_start(s);
1568         if (ret < 0)
1569             return ret;
1570 vbv_retry:
1571         if (encode_picture(s, s->picture_number) < 0)
1572             return -1;
1573
1574         avctx->header_bits = s->header_bits;
1575         avctx->mv_bits     = s->mv_bits;
1576         avctx->misc_bits   = s->misc_bits;
1577         avctx->i_tex_bits  = s->i_tex_bits;
1578         avctx->p_tex_bits  = s->p_tex_bits;
1579         avctx->i_count     = s->i_count;
1580         // FIXME f/b_count in avctx
1581         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1582         avctx->skip_count  = s->skip_count;
1583
1584         frame_end(s);
1585
1586         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1587             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1588
1589         if (avctx->rc_buffer_size) {
1590             RateControlContext *rcc = &s->rc_context;
1591             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1592
1593             if (put_bits_count(&s->pb) > max_size &&
1594                 s->lambda < s->avctx->lmax) {
1595                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1596                                        (s->qscale + 1) / s->qscale);
1597                 if (s->adaptive_quant) {
1598                     int i;
1599                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1600                         s->lambda_table[i] =
1601                             FFMAX(s->lambda_table[i] + 1,
1602                                   s->lambda_table[i] * (s->qscale + 1) /
1603                                   s->qscale);
1604                 }
1605                 s->mb_skipped = 0;        // done in frame_start()
1606                 // done in encode_picture() so we must undo it
1607                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1608                     if (s->flipflop_rounding          ||
1609                         s->codec_id == AV_CODEC_ID_H263P ||
1610                         s->codec_id == AV_CODEC_ID_MPEG4)
1611                         s->no_rounding ^= 1;
1612                 }
1613                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1614                     s->time_base       = s->last_time_base;
1615                     s->last_non_b_time = s->time - s->pp_time;
1616                 }
1617                 for (i = 0; i < context_count; i++) {
1618                     PutBitContext *pb = &s->thread_context[i]->pb;
1619                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1620                 }
1621                 goto vbv_retry;
1622             }
1623
1624             assert(s->avctx->rc_max_rate);
1625         }
1626
1627         if (s->flags & CODEC_FLAG_PASS1)
1628             ff_write_pass1_stats(s);
1629
1630         for (i = 0; i < 4; i++) {
1631             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1632             avctx->error[i] += s->current_picture_ptr->f->error[i];
1633         }
1634
1635         if (s->flags & CODEC_FLAG_PASS1)
1636             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1637                    avctx->i_tex_bits + avctx->p_tex_bits ==
1638                        put_bits_count(&s->pb));
1639         flush_put_bits(&s->pb);
1640         s->frame_bits  = put_bits_count(&s->pb);
1641
1642         stuffing_count = ff_vbv_update(s, s->frame_bits);
1643         if (stuffing_count) {
1644             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1645                     stuffing_count + 50) {
1646                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1647                 return -1;
1648             }
1649
1650             switch (s->codec_id) {
1651             case AV_CODEC_ID_MPEG1VIDEO:
1652             case AV_CODEC_ID_MPEG2VIDEO:
1653                 while (stuffing_count--) {
1654                     put_bits(&s->pb, 8, 0);
1655                 }
1656             break;
1657             case AV_CODEC_ID_MPEG4:
1658                 put_bits(&s->pb, 16, 0);
1659                 put_bits(&s->pb, 16, 0x1C3);
1660                 stuffing_count -= 4;
1661                 while (stuffing_count--) {
1662                     put_bits(&s->pb, 8, 0xFF);
1663                 }
1664             break;
1665             default:
1666                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1667             }
1668             flush_put_bits(&s->pb);
1669             s->frame_bits  = put_bits_count(&s->pb);
1670         }
1671
1672         /* update mpeg1/2 vbv_delay for CBR */
1673         if (s->avctx->rc_max_rate                          &&
1674             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1675             s->out_format == FMT_MPEG1                     &&
1676             90000LL * (avctx->rc_buffer_size - 1) <=
1677                 s->avctx->rc_max_rate * 0xFFFFLL) {
1678             int vbv_delay, min_delay;
1679             double inbits  = s->avctx->rc_max_rate *
1680                              av_q2d(s->avctx->time_base);
1681             int    minbits = s->frame_bits - 8 *
1682                              (s->vbv_delay_ptr - s->pb.buf - 1);
1683             double bits    = s->rc_context.buffer_index + minbits - inbits;
1684
1685             if (bits < 0)
1686                 av_log(s->avctx, AV_LOG_ERROR,
1687                        "Internal error, negative bits\n");
1688
1689             assert(s->repeat_first_field == 0);
1690
1691             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1692             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1693                         s->avctx->rc_max_rate;
1694
1695             vbv_delay = FFMAX(vbv_delay, min_delay);
1696
1697             assert(vbv_delay < 0xFFFF);
1698
1699             s->vbv_delay_ptr[0] &= 0xF8;
1700             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1701             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1702             s->vbv_delay_ptr[2] &= 0x07;
1703             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1704             avctx->vbv_delay     = vbv_delay * 300;
1705         }
1706         s->total_bits     += s->frame_bits;
1707         avctx->frame_bits  = s->frame_bits;
1708
1709         pkt->pts = s->current_picture.f->pts;
1710         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1711             if (!s->current_picture.f->coded_picture_number)
1712                 pkt->dts = pkt->pts - s->dts_delta;
1713             else
1714                 pkt->dts = s->reordered_pts;
1715             s->reordered_pts = pkt->pts;
1716         } else
1717             pkt->dts = pkt->pts;
1718         if (s->current_picture.f->key_frame)
1719             pkt->flags |= AV_PKT_FLAG_KEY;
1720         if (s->mb_info)
1721             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1722     } else {
1723         s->frame_bits = 0;
1724     }
1725     assert((s->frame_bits & 7) == 0);
1726
1727     pkt->size = s->frame_bits / 8;
1728     *got_packet = !!pkt->size;
1729     return 0;
1730 }
1731
1732 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1733                                                 int n, int threshold)
1734 {
1735     static const char tab[64] = {
1736         3, 2, 2, 1, 1, 1, 1, 1,
1737         1, 1, 1, 1, 1, 1, 1, 1,
1738         1, 1, 1, 1, 1, 1, 1, 1,
1739         0, 0, 0, 0, 0, 0, 0, 0,
1740         0, 0, 0, 0, 0, 0, 0, 0,
1741         0, 0, 0, 0, 0, 0, 0, 0,
1742         0, 0, 0, 0, 0, 0, 0, 0,
1743         0, 0, 0, 0, 0, 0, 0, 0
1744     };
1745     int score = 0;
1746     int run = 0;
1747     int i;
1748     int16_t *block = s->block[n];
1749     const int last_index = s->block_last_index[n];
1750     int skip_dc;
1751
1752     if (threshold < 0) {
1753         skip_dc = 0;
1754         threshold = -threshold;
1755     } else
1756         skip_dc = 1;
1757
1758     /* Are all we could set to zero already zero? */
1759     if (last_index <= skip_dc - 1)
1760         return;
1761
1762     for (i = 0; i <= last_index; i++) {
1763         const int j = s->intra_scantable.permutated[i];
1764         const int level = FFABS(block[j]);
1765         if (level == 1) {
1766             if (skip_dc && i == 0)
1767                 continue;
1768             score += tab[run];
1769             run = 0;
1770         } else if (level > 1) {
1771             return;
1772         } else {
1773             run++;
1774         }
1775     }
1776     if (score >= threshold)
1777         return;
1778     for (i = skip_dc; i <= last_index; i++) {
1779         const int j = s->intra_scantable.permutated[i];
1780         block[j] = 0;
1781     }
1782     if (block[0])
1783         s->block_last_index[n] = 0;
1784     else
1785         s->block_last_index[n] = -1;
1786 }
1787
1788 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1789                                int last_index)
1790 {
1791     int i;
1792     const int maxlevel = s->max_qcoeff;
1793     const int minlevel = s->min_qcoeff;
1794     int overflow = 0;
1795
1796     if (s->mb_intra) {
1797         i = 1; // skip clipping of intra dc
1798     } else
1799         i = 0;
1800
1801     for (; i <= last_index; i++) {
1802         const int j = s->intra_scantable.permutated[i];
1803         int level = block[j];
1804
1805         if (level > maxlevel) {
1806             level = maxlevel;
1807             overflow++;
1808         } else if (level < minlevel) {
1809             level = minlevel;
1810             overflow++;
1811         }
1812
1813         block[j] = level;
1814     }
1815
1816     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1817         av_log(s->avctx, AV_LOG_INFO,
1818                "warning, clipping %d dct coefficients to %d..%d\n",
1819                overflow, minlevel, maxlevel);
1820 }
1821
1822 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1823 {
1824     int x, y;
1825     // FIXME optimize
1826     for (y = 0; y < 8; y++) {
1827         for (x = 0; x < 8; x++) {
1828             int x2, y2;
1829             int sum = 0;
1830             int sqr = 0;
1831             int count = 0;
1832
1833             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1834                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1835                     int v = ptr[x2 + y2 * stride];
1836                     sum += v;
1837                     sqr += v * v;
1838                     count++;
1839                 }
1840             }
1841             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1842         }
1843     }
1844 }
1845
1846 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1847                                                 int motion_x, int motion_y,
1848                                                 int mb_block_height,
1849                                                 int mb_block_count)
1850 {
1851     int16_t weight[8][64];
1852     int16_t orig[8][64];
1853     const int mb_x = s->mb_x;
1854     const int mb_y = s->mb_y;
1855     int i;
1856     int skip_dct[8];
1857     int dct_offset = s->linesize * 8; // default for progressive frames
1858     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1859     ptrdiff_t wrap_y, wrap_c;
1860
1861     for (i = 0; i < mb_block_count; i++)
1862         skip_dct[i] = s->skipdct;
1863
1864     if (s->adaptive_quant) {
1865         const int last_qp = s->qscale;
1866         const int mb_xy = mb_x + mb_y * s->mb_stride;
1867
1868         s->lambda = s->lambda_table[mb_xy];
1869         update_qscale(s);
1870
1871         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1872             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1873             s->dquant = s->qscale - last_qp;
1874
1875             if (s->out_format == FMT_H263) {
1876                 s->dquant = av_clip(s->dquant, -2, 2);
1877
1878                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1879                     if (!s->mb_intra) {
1880                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1881                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1882                                 s->dquant = 0;
1883                         }
1884                         if (s->mv_type == MV_TYPE_8X8)
1885                             s->dquant = 0;
1886                     }
1887                 }
1888             }
1889         }
1890         ff_set_qscale(s, last_qp + s->dquant);
1891     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1892         ff_set_qscale(s, s->qscale + s->dquant);
1893
1894     wrap_y = s->linesize;
1895     wrap_c = s->uvlinesize;
1896     ptr_y  = s->new_picture.f->data[0] +
1897              (mb_y * 16 * wrap_y)              + mb_x * 16;
1898     ptr_cb = s->new_picture.f->data[1] +
1899              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1900     ptr_cr = s->new_picture.f->data[2] +
1901              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1902
1903     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1904         uint8_t *ebuf = s->edge_emu_buffer + 32;
1905         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1906                                  wrap_y, wrap_y,
1907                                  16, 16, mb_x * 16, mb_y * 16,
1908                                  s->width, s->height);
1909         ptr_y = ebuf;
1910         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1911                                  wrap_c, wrap_c,
1912                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1913                                  s->width >> 1, s->height >> 1);
1914         ptr_cb = ebuf + 18 * wrap_y;
1915         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1916                                  wrap_c, wrap_c,
1917                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1918                                  s->width >> 1, s->height >> 1);
1919         ptr_cr = ebuf + 18 * wrap_y + 8;
1920     }
1921
1922     if (s->mb_intra) {
1923         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1924             int progressive_score, interlaced_score;
1925
1926             s->interlaced_dct = 0;
1927             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
1928                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1929                                                      NULL, wrap_y, 8) - 400;
1930
1931             if (progressive_score > 0) {
1932                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
1933                                                         NULL, wrap_y * 2, 8) +
1934                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
1935                                                         NULL, wrap_y * 2, 8);
1936                 if (progressive_score > interlaced_score) {
1937                     s->interlaced_dct = 1;
1938
1939                     dct_offset = wrap_y;
1940                     wrap_y <<= 1;
1941                     if (s->chroma_format == CHROMA_422)
1942                         wrap_c <<= 1;
1943                 }
1944             }
1945         }
1946
1947         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
1948         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
1949         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
1950         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
1951
1952         if (s->flags & CODEC_FLAG_GRAY) {
1953             skip_dct[4] = 1;
1954             skip_dct[5] = 1;
1955         } else {
1956             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1957             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1958             if (!s->chroma_y_shift) { /* 422 */
1959                 s->pdsp.get_pixels(s->block[6],
1960                                    ptr_cb + (dct_offset >> 1), wrap_c);
1961                 s->pdsp.get_pixels(s->block[7],
1962                                    ptr_cr + (dct_offset >> 1), wrap_c);
1963             }
1964         }
1965     } else {
1966         op_pixels_func (*op_pix)[4];
1967         qpel_mc_func (*op_qpix)[16];
1968         uint8_t *dest_y, *dest_cb, *dest_cr;
1969
1970         dest_y  = s->dest[0];
1971         dest_cb = s->dest[1];
1972         dest_cr = s->dest[2];
1973
1974         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1975             op_pix  = s->hdsp.put_pixels_tab;
1976             op_qpix = s->qdsp.put_qpel_pixels_tab;
1977         } else {
1978             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1979             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
1980         }
1981
1982         if (s->mv_dir & MV_DIR_FORWARD) {
1983             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1984                           s->last_picture.f->data,
1985                           op_pix, op_qpix);
1986             op_pix  = s->hdsp.avg_pixels_tab;
1987             op_qpix = s->qdsp.avg_qpel_pixels_tab;
1988         }
1989         if (s->mv_dir & MV_DIR_BACKWARD) {
1990             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1991                           s->next_picture.f->data,
1992                           op_pix, op_qpix);
1993         }
1994
1995         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1996             int progressive_score, interlaced_score;
1997
1998             s->interlaced_dct = 0;
1999             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2000                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2001                                                      ptr_y + wrap_y * 8,
2002                                                      wrap_y, 8) - 400;
2003
2004             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2005                 progressive_score -= 400;
2006
2007             if (progressive_score > 0) {
2008                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2009                                                         wrap_y * 2, 8) +
2010                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2011                                                         ptr_y + wrap_y,
2012                                                         wrap_y * 2, 8);
2013
2014                 if (progressive_score > interlaced_score) {
2015                     s->interlaced_dct = 1;
2016
2017                     dct_offset = wrap_y;
2018                     wrap_y <<= 1;
2019                     if (s->chroma_format == CHROMA_422)
2020                         wrap_c <<= 1;
2021                 }
2022             }
2023         }
2024
2025         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2026         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2027         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2028                             dest_y + dct_offset, wrap_y);
2029         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2030                             dest_y + dct_offset + 8, wrap_y);
2031
2032         if (s->flags & CODEC_FLAG_GRAY) {
2033             skip_dct[4] = 1;
2034             skip_dct[5] = 1;
2035         } else {
2036             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2037             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2038             if (!s->chroma_y_shift) { /* 422 */
2039                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2040                                     dest_cb + (dct_offset >> 1), wrap_c);
2041                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2042                                     dest_cr + (dct_offset >> 1), wrap_c);
2043             }
2044         }
2045         /* pre quantization */
2046         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2047                 2 * s->qscale * s->qscale) {
2048             // FIXME optimize
2049             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2050                 skip_dct[0] = 1;
2051             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2052                 skip_dct[1] = 1;
2053             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2054                                wrap_y, 8) < 20 * s->qscale)
2055                 skip_dct[2] = 1;
2056             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2057                                wrap_y, 8) < 20 * s->qscale)
2058                 skip_dct[3] = 1;
2059             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2060                 skip_dct[4] = 1;
2061             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2062                 skip_dct[5] = 1;
2063             if (!s->chroma_y_shift) { /* 422 */
2064                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2065                                    dest_cb + (dct_offset >> 1),
2066                                    wrap_c, 8) < 20 * s->qscale)
2067                     skip_dct[6] = 1;
2068                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2069                                    dest_cr + (dct_offset >> 1),
2070                                    wrap_c, 8) < 20 * s->qscale)
2071                     skip_dct[7] = 1;
2072             }
2073         }
2074     }
2075
2076     if (s->quantizer_noise_shaping) {
2077         if (!skip_dct[0])
2078             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2079         if (!skip_dct[1])
2080             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2081         if (!skip_dct[2])
2082             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2083         if (!skip_dct[3])
2084             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2085         if (!skip_dct[4])
2086             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2087         if (!skip_dct[5])
2088             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2089         if (!s->chroma_y_shift) { /* 422 */
2090             if (!skip_dct[6])
2091                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2092                                   wrap_c);
2093             if (!skip_dct[7])
2094                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2095                                   wrap_c);
2096         }
2097         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2098     }
2099
2100     /* DCT & quantize */
2101     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2102     {
2103         for (i = 0; i < mb_block_count; i++) {
2104             if (!skip_dct[i]) {
2105                 int overflow;
2106                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2107                 // FIXME we could decide to change to quantizer instead of
2108                 // clipping
2109                 // JS: I don't think that would be a good idea it could lower
2110                 //     quality instead of improve it. Just INTRADC clipping
2111                 //     deserves changes in quantizer
2112                 if (overflow)
2113                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2114             } else
2115                 s->block_last_index[i] = -1;
2116         }
2117         if (s->quantizer_noise_shaping) {
2118             for (i = 0; i < mb_block_count; i++) {
2119                 if (!skip_dct[i]) {
2120                     s->block_last_index[i] =
2121                         dct_quantize_refine(s, s->block[i], weight[i],
2122                                             orig[i], i, s->qscale);
2123                 }
2124             }
2125         }
2126
2127         if (s->luma_elim_threshold && !s->mb_intra)
2128             for (i = 0; i < 4; i++)
2129                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2130         if (s->chroma_elim_threshold && !s->mb_intra)
2131             for (i = 4; i < mb_block_count; i++)
2132                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2133
2134         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2135             for (i = 0; i < mb_block_count; i++) {
2136                 if (s->block_last_index[i] == -1)
2137                     s->coded_score[i] = INT_MAX / 256;
2138             }
2139         }
2140     }
2141
2142     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2143         s->block_last_index[4] =
2144         s->block_last_index[5] = 0;
2145         s->block[4][0] =
2146         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2147     }
2148
2149     // non c quantize code returns incorrect block_last_index FIXME
2150     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2151         for (i = 0; i < mb_block_count; i++) {
2152             int j;
2153             if (s->block_last_index[i] > 0) {
2154                 for (j = 63; j > 0; j--) {
2155                     if (s->block[i][s->intra_scantable.permutated[j]])
2156                         break;
2157                 }
2158                 s->block_last_index[i] = j;
2159             }
2160         }
2161     }
2162
2163     /* huffman encode */
2164     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2165     case AV_CODEC_ID_MPEG1VIDEO:
2166     case AV_CODEC_ID_MPEG2VIDEO:
2167         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2168             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2169         break;
2170     case AV_CODEC_ID_MPEG4:
2171         if (CONFIG_MPEG4_ENCODER)
2172             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2173         break;
2174     case AV_CODEC_ID_MSMPEG4V2:
2175     case AV_CODEC_ID_MSMPEG4V3:
2176     case AV_CODEC_ID_WMV1:
2177         if (CONFIG_MSMPEG4_ENCODER)
2178             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2179         break;
2180     case AV_CODEC_ID_WMV2:
2181         if (CONFIG_WMV2_ENCODER)
2182             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2183         break;
2184     case AV_CODEC_ID_H261:
2185         if (CONFIG_H261_ENCODER)
2186             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2187         break;
2188     case AV_CODEC_ID_H263:
2189     case AV_CODEC_ID_H263P:
2190     case AV_CODEC_ID_FLV1:
2191     case AV_CODEC_ID_RV10:
2192     case AV_CODEC_ID_RV20:
2193         if (CONFIG_H263_ENCODER)
2194             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2195         break;
2196     case AV_CODEC_ID_MJPEG:
2197         if (CONFIG_MJPEG_ENCODER)
2198             ff_mjpeg_encode_mb(s, s->block);
2199         break;
2200     default:
2201         assert(0);
2202     }
2203 }
2204
2205 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2206 {
2207     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2208     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2209 }
2210
2211 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2212     int i;
2213
2214     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2215
2216     /* mpeg1 */
2217     d->mb_skip_run= s->mb_skip_run;
2218     for(i=0; i<3; i++)
2219         d->last_dc[i] = s->last_dc[i];
2220
2221     /* statistics */
2222     d->mv_bits= s->mv_bits;
2223     d->i_tex_bits= s->i_tex_bits;
2224     d->p_tex_bits= s->p_tex_bits;
2225     d->i_count= s->i_count;
2226     d->f_count= s->f_count;
2227     d->b_count= s->b_count;
2228     d->skip_count= s->skip_count;
2229     d->misc_bits= s->misc_bits;
2230     d->last_bits= 0;
2231
2232     d->mb_skipped= 0;
2233     d->qscale= s->qscale;
2234     d->dquant= s->dquant;
2235
2236     d->esc3_level_length= s->esc3_level_length;
2237 }
2238
2239 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2240     int i;
2241
2242     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2243     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2244
2245     /* mpeg1 */
2246     d->mb_skip_run= s->mb_skip_run;
2247     for(i=0; i<3; i++)
2248         d->last_dc[i] = s->last_dc[i];
2249
2250     /* statistics */
2251     d->mv_bits= s->mv_bits;
2252     d->i_tex_bits= s->i_tex_bits;
2253     d->p_tex_bits= s->p_tex_bits;
2254     d->i_count= s->i_count;
2255     d->f_count= s->f_count;
2256     d->b_count= s->b_count;
2257     d->skip_count= s->skip_count;
2258     d->misc_bits= s->misc_bits;
2259
2260     d->mb_intra= s->mb_intra;
2261     d->mb_skipped= s->mb_skipped;
2262     d->mv_type= s->mv_type;
2263     d->mv_dir= s->mv_dir;
2264     d->pb= s->pb;
2265     if(s->data_partitioning){
2266         d->pb2= s->pb2;
2267         d->tex_pb= s->tex_pb;
2268     }
2269     d->block= s->block;
2270     for(i=0; i<8; i++)
2271         d->block_last_index[i]= s->block_last_index[i];
2272     d->interlaced_dct= s->interlaced_dct;
2273     d->qscale= s->qscale;
2274
2275     d->esc3_level_length= s->esc3_level_length;
2276 }
2277
2278 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2279                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2280                            int *dmin, int *next_block, int motion_x, int motion_y)
2281 {
2282     int score;
2283     uint8_t *dest_backup[3];
2284
2285     copy_context_before_encode(s, backup, type);
2286
2287     s->block= s->blocks[*next_block];
2288     s->pb= pb[*next_block];
2289     if(s->data_partitioning){
2290         s->pb2   = pb2   [*next_block];
2291         s->tex_pb= tex_pb[*next_block];
2292     }
2293
2294     if(*next_block){
2295         memcpy(dest_backup, s->dest, sizeof(s->dest));
2296         s->dest[0] = s->rd_scratchpad;
2297         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2298         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2299         assert(s->linesize >= 32); //FIXME
2300     }
2301
2302     encode_mb(s, motion_x, motion_y);
2303
2304     score= put_bits_count(&s->pb);
2305     if(s->data_partitioning){
2306         score+= put_bits_count(&s->pb2);
2307         score+= put_bits_count(&s->tex_pb);
2308     }
2309
2310     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2311         ff_MPV_decode_mb(s, s->block);
2312
2313         score *= s->lambda2;
2314         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2315     }
2316
2317     if(*next_block){
2318         memcpy(s->dest, dest_backup, sizeof(s->dest));
2319     }
2320
2321     if(score<*dmin){
2322         *dmin= score;
2323         *next_block^=1;
2324
2325         copy_context_after_encode(best, s, type);
2326     }
2327 }
2328
2329 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2330     uint32_t *sq = ff_square_tab + 256;
2331     int acc=0;
2332     int x,y;
2333
2334     if(w==16 && h==16)
2335         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2336     else if(w==8 && h==8)
2337         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2338
2339     for(y=0; y<h; y++){
2340         for(x=0; x<w; x++){
2341             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2342         }
2343     }
2344
2345     assert(acc>=0);
2346
2347     return acc;
2348 }
2349
2350 static int sse_mb(MpegEncContext *s){
2351     int w= 16;
2352     int h= 16;
2353
2354     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2355     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2356
2357     if(w==16 && h==16)
2358       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2359         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2360                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2361                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2362       }else{
2363         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2364                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2365                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2366       }
2367     else
2368         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2369                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2370                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2371 }
2372
2373 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2374     MpegEncContext *s= *(void**)arg;
2375
2376
2377     s->me.pre_pass=1;
2378     s->me.dia_size= s->avctx->pre_dia_size;
2379     s->first_slice_line=1;
2380     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2381         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2382             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2383         }
2384         s->first_slice_line=0;
2385     }
2386
2387     s->me.pre_pass=0;
2388
2389     return 0;
2390 }
2391
2392 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2393     MpegEncContext *s= *(void**)arg;
2394
2395     s->me.dia_size= s->avctx->dia_size;
2396     s->first_slice_line=1;
2397     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2398         s->mb_x=0; //for block init below
2399         ff_init_block_index(s);
2400         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2401             s->block_index[0]+=2;
2402             s->block_index[1]+=2;
2403             s->block_index[2]+=2;
2404             s->block_index[3]+=2;
2405
2406             /* compute motion vector & mb_type and store in context */
2407             if(s->pict_type==AV_PICTURE_TYPE_B)
2408                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2409             else
2410                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2411         }
2412         s->first_slice_line=0;
2413     }
2414     return 0;
2415 }
2416
2417 static int mb_var_thread(AVCodecContext *c, void *arg){
2418     MpegEncContext *s= *(void**)arg;
2419     int mb_x, mb_y;
2420
2421     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2422         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2423             int xx = mb_x * 16;
2424             int yy = mb_y * 16;
2425             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2426             int varc;
2427             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2428
2429             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2430                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2431
2432             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2433             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2434             s->me.mb_var_sum_temp    += varc;
2435         }
2436     }
2437     return 0;
2438 }
2439
2440 static void write_slice_end(MpegEncContext *s){
2441     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2442         if(s->partitioned_frame){
2443             ff_mpeg4_merge_partitions(s);
2444         }
2445
2446         ff_mpeg4_stuffing(&s->pb);
2447     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2448         ff_mjpeg_encode_stuffing(&s->pb);
2449     }
2450
2451     avpriv_align_put_bits(&s->pb);
2452     flush_put_bits(&s->pb);
2453
2454     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2455         s->misc_bits+= get_bits_diff(s);
2456 }
2457
2458 static void write_mb_info(MpegEncContext *s)
2459 {
2460     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2461     int offset = put_bits_count(&s->pb);
2462     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2463     int gobn = s->mb_y / s->gob_index;
2464     int pred_x, pred_y;
2465     if (CONFIG_H263_ENCODER)
2466         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2467     bytestream_put_le32(&ptr, offset);
2468     bytestream_put_byte(&ptr, s->qscale);
2469     bytestream_put_byte(&ptr, gobn);
2470     bytestream_put_le16(&ptr, mba);
2471     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2472     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2473     /* 4MV not implemented */
2474     bytestream_put_byte(&ptr, 0); /* hmv2 */
2475     bytestream_put_byte(&ptr, 0); /* vmv2 */
2476 }
2477
2478 static void update_mb_info(MpegEncContext *s, int startcode)
2479 {
2480     if (!s->mb_info)
2481         return;
2482     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2483         s->mb_info_size += 12;
2484         s->prev_mb_info = s->last_mb_info;
2485     }
2486     if (startcode) {
2487         s->prev_mb_info = put_bits_count(&s->pb)/8;
2488         /* This might have incremented mb_info_size above, and we return without
2489          * actually writing any info into that slot yet. But in that case,
2490          * this will be called again at the start of the after writing the
2491          * start code, actually writing the mb info. */
2492         return;
2493     }
2494
2495     s->last_mb_info = put_bits_count(&s->pb)/8;
2496     if (!s->mb_info_size)
2497         s->mb_info_size += 12;
2498     write_mb_info(s);
2499 }
2500
2501 static int encode_thread(AVCodecContext *c, void *arg){
2502     MpegEncContext *s= *(void**)arg;
2503     int mb_x, mb_y, pdif = 0;
2504     int chr_h= 16>>s->chroma_y_shift;
2505     int i, j;
2506     MpegEncContext best_s, backup_s;
2507     uint8_t bit_buf[2][MAX_MB_BYTES];
2508     uint8_t bit_buf2[2][MAX_MB_BYTES];
2509     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2510     PutBitContext pb[2], pb2[2], tex_pb[2];
2511
2512     for(i=0; i<2; i++){
2513         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2514         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2515         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2516     }
2517
2518     s->last_bits= put_bits_count(&s->pb);
2519     s->mv_bits=0;
2520     s->misc_bits=0;
2521     s->i_tex_bits=0;
2522     s->p_tex_bits=0;
2523     s->i_count=0;
2524     s->f_count=0;
2525     s->b_count=0;
2526     s->skip_count=0;
2527
2528     for(i=0; i<3; i++){
2529         /* init last dc values */
2530         /* note: quant matrix value (8) is implied here */
2531         s->last_dc[i] = 128 << s->intra_dc_precision;
2532
2533         s->current_picture.f->error[i] = 0;
2534     }
2535     s->mb_skip_run = 0;
2536     memset(s->last_mv, 0, sizeof(s->last_mv));
2537
2538     s->last_mv_dir = 0;
2539
2540     switch(s->codec_id){
2541     case AV_CODEC_ID_H263:
2542     case AV_CODEC_ID_H263P:
2543     case AV_CODEC_ID_FLV1:
2544         if (CONFIG_H263_ENCODER)
2545             s->gob_index = ff_h263_get_gob_height(s);
2546         break;
2547     case AV_CODEC_ID_MPEG4:
2548         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2549             ff_mpeg4_init_partitions(s);
2550         break;
2551     }
2552
2553     s->resync_mb_x=0;
2554     s->resync_mb_y=0;
2555     s->first_slice_line = 1;
2556     s->ptr_lastgob = s->pb.buf;
2557     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2558         s->mb_x=0;
2559         s->mb_y= mb_y;
2560
2561         ff_set_qscale(s, s->qscale);
2562         ff_init_block_index(s);
2563
2564         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2565             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2566             int mb_type= s->mb_type[xy];
2567 //            int d;
2568             int dmin= INT_MAX;
2569             int dir;
2570
2571             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2572                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2573                 return -1;
2574             }
2575             if(s->data_partitioning){
2576                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2577                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2578                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2579                     return -1;
2580                 }
2581             }
2582
2583             s->mb_x = mb_x;
2584             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2585             ff_update_block_index(s);
2586
2587             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2588                 ff_h261_reorder_mb_index(s);
2589                 xy= s->mb_y*s->mb_stride + s->mb_x;
2590                 mb_type= s->mb_type[xy];
2591             }
2592
2593             /* write gob / video packet header  */
2594             if(s->rtp_mode){
2595                 int current_packet_size, is_gob_start;
2596
2597                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2598
2599                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2600
2601                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2602
2603                 switch(s->codec_id){
2604                 case AV_CODEC_ID_H263:
2605                 case AV_CODEC_ID_H263P:
2606                     if(!s->h263_slice_structured)
2607                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2608                     break;
2609                 case AV_CODEC_ID_MPEG2VIDEO:
2610                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2611                 case AV_CODEC_ID_MPEG1VIDEO:
2612                     if(s->mb_skip_run) is_gob_start=0;
2613                     break;
2614                 }
2615
2616                 if(is_gob_start){
2617                     if(s->start_mb_y != mb_y || mb_x!=0){
2618                         write_slice_end(s);
2619
2620                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2621                             ff_mpeg4_init_partitions(s);
2622                         }
2623                     }
2624
2625                     assert((put_bits_count(&s->pb)&7) == 0);
2626                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2627
2628                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2629                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2630                         int d = 100 / s->error_rate;
2631                         if(r % d == 0){
2632                             current_packet_size=0;
2633                             s->pb.buf_ptr= s->ptr_lastgob;
2634                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2635                         }
2636                     }
2637
2638                     if (s->avctx->rtp_callback){
2639                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2640                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2641                     }
2642                     update_mb_info(s, 1);
2643
2644                     switch(s->codec_id){
2645                     case AV_CODEC_ID_MPEG4:
2646                         if (CONFIG_MPEG4_ENCODER) {
2647                             ff_mpeg4_encode_video_packet_header(s);
2648                             ff_mpeg4_clean_buffers(s);
2649                         }
2650                     break;
2651                     case AV_CODEC_ID_MPEG1VIDEO:
2652                     case AV_CODEC_ID_MPEG2VIDEO:
2653                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2654                             ff_mpeg1_encode_slice_header(s);
2655                             ff_mpeg1_clean_buffers(s);
2656                         }
2657                     break;
2658                     case AV_CODEC_ID_H263:
2659                     case AV_CODEC_ID_H263P:
2660                         if (CONFIG_H263_ENCODER)
2661                             ff_h263_encode_gob_header(s, mb_y);
2662                     break;
2663                     }
2664
2665                     if(s->flags&CODEC_FLAG_PASS1){
2666                         int bits= put_bits_count(&s->pb);
2667                         s->misc_bits+= bits - s->last_bits;
2668                         s->last_bits= bits;
2669                     }
2670
2671                     s->ptr_lastgob += current_packet_size;
2672                     s->first_slice_line=1;
2673                     s->resync_mb_x=mb_x;
2674                     s->resync_mb_y=mb_y;
2675                 }
2676             }
2677
2678             if(  (s->resync_mb_x   == s->mb_x)
2679                && s->resync_mb_y+1 == s->mb_y){
2680                 s->first_slice_line=0;
2681             }
2682
2683             s->mb_skipped=0;
2684             s->dquant=0; //only for QP_RD
2685
2686             update_mb_info(s, 0);
2687
2688             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2689                 int next_block=0;
2690                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2691
2692                 copy_context_before_encode(&backup_s, s, -1);
2693                 backup_s.pb= s->pb;
2694                 best_s.data_partitioning= s->data_partitioning;
2695                 best_s.partitioned_frame= s->partitioned_frame;
2696                 if(s->data_partitioning){
2697                     backup_s.pb2= s->pb2;
2698                     backup_s.tex_pb= s->tex_pb;
2699                 }
2700
2701                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2702                     s->mv_dir = MV_DIR_FORWARD;
2703                     s->mv_type = MV_TYPE_16X16;
2704                     s->mb_intra= 0;
2705                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2706                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2707                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2708                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2709                 }
2710                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2711                     s->mv_dir = MV_DIR_FORWARD;
2712                     s->mv_type = MV_TYPE_FIELD;
2713                     s->mb_intra= 0;
2714                     for(i=0; i<2; i++){
2715                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2716                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2717                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2718                     }
2719                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2720                                  &dmin, &next_block, 0, 0);
2721                 }
2722                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2723                     s->mv_dir = MV_DIR_FORWARD;
2724                     s->mv_type = MV_TYPE_16X16;
2725                     s->mb_intra= 0;
2726                     s->mv[0][0][0] = 0;
2727                     s->mv[0][0][1] = 0;
2728                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2729                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2730                 }
2731                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2732                     s->mv_dir = MV_DIR_FORWARD;
2733                     s->mv_type = MV_TYPE_8X8;
2734                     s->mb_intra= 0;
2735                     for(i=0; i<4; i++){
2736                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2737                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2738                     }
2739                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2740                                  &dmin, &next_block, 0, 0);
2741                 }
2742                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2743                     s->mv_dir = MV_DIR_FORWARD;
2744                     s->mv_type = MV_TYPE_16X16;
2745                     s->mb_intra= 0;
2746                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2747                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2748                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2749                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2750                 }
2751                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2752                     s->mv_dir = MV_DIR_BACKWARD;
2753                     s->mv_type = MV_TYPE_16X16;
2754                     s->mb_intra= 0;
2755                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2756                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2757                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2758                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2759                 }
2760                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2761                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2762                     s->mv_type = MV_TYPE_16X16;
2763                     s->mb_intra= 0;
2764                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2765                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2766                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2767                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2768                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2769                                  &dmin, &next_block, 0, 0);
2770                 }
2771                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2772                     s->mv_dir = MV_DIR_FORWARD;
2773                     s->mv_type = MV_TYPE_FIELD;
2774                     s->mb_intra= 0;
2775                     for(i=0; i<2; i++){
2776                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2777                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2778                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2779                     }
2780                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2781                                  &dmin, &next_block, 0, 0);
2782                 }
2783                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2784                     s->mv_dir = MV_DIR_BACKWARD;
2785                     s->mv_type = MV_TYPE_FIELD;
2786                     s->mb_intra= 0;
2787                     for(i=0; i<2; i++){
2788                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2789                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2790                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2791                     }
2792                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2793                                  &dmin, &next_block, 0, 0);
2794                 }
2795                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2796                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2797                     s->mv_type = MV_TYPE_FIELD;
2798                     s->mb_intra= 0;
2799                     for(dir=0; dir<2; dir++){
2800                         for(i=0; i<2; i++){
2801                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2802                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2803                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2804                         }
2805                     }
2806                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2807                                  &dmin, &next_block, 0, 0);
2808                 }
2809                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2810                     s->mv_dir = 0;
2811                     s->mv_type = MV_TYPE_16X16;
2812                     s->mb_intra= 1;
2813                     s->mv[0][0][0] = 0;
2814                     s->mv[0][0][1] = 0;
2815                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2816                                  &dmin, &next_block, 0, 0);
2817                     if(s->h263_pred || s->h263_aic){
2818                         if(best_s.mb_intra)
2819                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2820                         else
2821                             ff_clean_intra_table_entries(s); //old mode?
2822                     }
2823                 }
2824
2825                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2826                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2827                         const int last_qp= backup_s.qscale;
2828                         int qpi, qp, dc[6];
2829                         int16_t ac[6][16];
2830                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2831                         static const int dquant_tab[4]={-1,1,-2,2};
2832
2833                         assert(backup_s.dquant == 0);
2834
2835                         //FIXME intra
2836                         s->mv_dir= best_s.mv_dir;
2837                         s->mv_type = MV_TYPE_16X16;
2838                         s->mb_intra= best_s.mb_intra;
2839                         s->mv[0][0][0] = best_s.mv[0][0][0];
2840                         s->mv[0][0][1] = best_s.mv[0][0][1];
2841                         s->mv[1][0][0] = best_s.mv[1][0][0];
2842                         s->mv[1][0][1] = best_s.mv[1][0][1];
2843
2844                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2845                         for(; qpi<4; qpi++){
2846                             int dquant= dquant_tab[qpi];
2847                             qp= last_qp + dquant;
2848                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2849                                 continue;
2850                             backup_s.dquant= dquant;
2851                             if(s->mb_intra && s->dc_val[0]){
2852                                 for(i=0; i<6; i++){
2853                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2854                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2855                                 }
2856                             }
2857
2858                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2859                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2860                             if(best_s.qscale != qp){
2861                                 if(s->mb_intra && s->dc_val[0]){
2862                                     for(i=0; i<6; i++){
2863                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2864                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2865                                     }
2866                                 }
2867                             }
2868                         }
2869                     }
2870                 }
2871                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2872                     int mx= s->b_direct_mv_table[xy][0];
2873                     int my= s->b_direct_mv_table[xy][1];
2874
2875                     backup_s.dquant = 0;
2876                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2877                     s->mb_intra= 0;
2878                     ff_mpeg4_set_direct_mv(s, mx, my);
2879                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2880                                  &dmin, &next_block, mx, my);
2881                 }
2882                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2883                     backup_s.dquant = 0;
2884                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2885                     s->mb_intra= 0;
2886                     ff_mpeg4_set_direct_mv(s, 0, 0);
2887                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2888                                  &dmin, &next_block, 0, 0);
2889                 }
2890                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2891                     int coded=0;
2892                     for(i=0; i<6; i++)
2893                         coded |= s->block_last_index[i];
2894                     if(coded){
2895                         int mx,my;
2896                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2897                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2898                             mx=my=0; //FIXME find the one we actually used
2899                             ff_mpeg4_set_direct_mv(s, mx, my);
2900                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2901                             mx= s->mv[1][0][0];
2902                             my= s->mv[1][0][1];
2903                         }else{
2904                             mx= s->mv[0][0][0];
2905                             my= s->mv[0][0][1];
2906                         }
2907
2908                         s->mv_dir= best_s.mv_dir;
2909                         s->mv_type = best_s.mv_type;
2910                         s->mb_intra= 0;
2911 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2912                         s->mv[0][0][1] = best_s.mv[0][0][1];
2913                         s->mv[1][0][0] = best_s.mv[1][0][0];
2914                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2915                         backup_s.dquant= 0;
2916                         s->skipdct=1;
2917                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2918                                         &dmin, &next_block, mx, my);
2919                         s->skipdct=0;
2920                     }
2921                 }
2922
2923                 s->current_picture.qscale_table[xy] = best_s.qscale;
2924
2925                 copy_context_after_encode(s, &best_s, -1);
2926
2927                 pb_bits_count= put_bits_count(&s->pb);
2928                 flush_put_bits(&s->pb);
2929                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2930                 s->pb= backup_s.pb;
2931
2932                 if(s->data_partitioning){
2933                     pb2_bits_count= put_bits_count(&s->pb2);
2934                     flush_put_bits(&s->pb2);
2935                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2936                     s->pb2= backup_s.pb2;
2937
2938                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2939                     flush_put_bits(&s->tex_pb);
2940                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2941                     s->tex_pb= backup_s.tex_pb;
2942                 }
2943                 s->last_bits= put_bits_count(&s->pb);
2944
2945                 if (CONFIG_H263_ENCODER &&
2946                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2947                     ff_h263_update_motion_val(s);
2948
2949                 if(next_block==0){ //FIXME 16 vs linesize16
2950                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2951                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2952                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2953                 }
2954
2955                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2956                     ff_MPV_decode_mb(s, s->block);
2957             } else {
2958                 int motion_x = 0, motion_y = 0;
2959                 s->mv_type=MV_TYPE_16X16;
2960                 // only one MB-Type possible
2961
2962                 switch(mb_type){
2963                 case CANDIDATE_MB_TYPE_INTRA:
2964                     s->mv_dir = 0;
2965                     s->mb_intra= 1;
2966                     motion_x= s->mv[0][0][0] = 0;
2967                     motion_y= s->mv[0][0][1] = 0;
2968                     break;
2969                 case CANDIDATE_MB_TYPE_INTER:
2970                     s->mv_dir = MV_DIR_FORWARD;
2971                     s->mb_intra= 0;
2972                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2973                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2974                     break;
2975                 case CANDIDATE_MB_TYPE_INTER_I:
2976                     s->mv_dir = MV_DIR_FORWARD;
2977                     s->mv_type = MV_TYPE_FIELD;
2978                     s->mb_intra= 0;
2979                     for(i=0; i<2; i++){
2980                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2981                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2982                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2983                     }
2984                     break;
2985                 case CANDIDATE_MB_TYPE_INTER4V:
2986                     s->mv_dir = MV_DIR_FORWARD;
2987                     s->mv_type = MV_TYPE_8X8;
2988                     s->mb_intra= 0;
2989                     for(i=0; i<4; i++){
2990                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2991                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2992                     }
2993                     break;
2994                 case CANDIDATE_MB_TYPE_DIRECT:
2995                     if (CONFIG_MPEG4_ENCODER) {
2996                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2997                         s->mb_intra= 0;
2998                         motion_x=s->b_direct_mv_table[xy][0];
2999                         motion_y=s->b_direct_mv_table[xy][1];
3000                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3001                     }
3002                     break;
3003                 case CANDIDATE_MB_TYPE_DIRECT0:
3004                     if (CONFIG_MPEG4_ENCODER) {
3005                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3006                         s->mb_intra= 0;
3007                         ff_mpeg4_set_direct_mv(s, 0, 0);
3008                     }
3009                     break;
3010                 case CANDIDATE_MB_TYPE_BIDIR:
3011                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3012                     s->mb_intra= 0;
3013                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3014                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3015                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3016                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3017                     break;
3018                 case CANDIDATE_MB_TYPE_BACKWARD:
3019                     s->mv_dir = MV_DIR_BACKWARD;
3020                     s->mb_intra= 0;
3021                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3022                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3023                     break;
3024                 case CANDIDATE_MB_TYPE_FORWARD:
3025                     s->mv_dir = MV_DIR_FORWARD;
3026                     s->mb_intra= 0;
3027                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3028                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3029                     break;
3030                 case CANDIDATE_MB_TYPE_FORWARD_I:
3031                     s->mv_dir = MV_DIR_FORWARD;
3032                     s->mv_type = MV_TYPE_FIELD;
3033                     s->mb_intra= 0;
3034                     for(i=0; i<2; i++){
3035                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3036                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3037                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3038                     }
3039                     break;
3040                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3041                     s->mv_dir = MV_DIR_BACKWARD;
3042                     s->mv_type = MV_TYPE_FIELD;
3043                     s->mb_intra= 0;
3044                     for(i=0; i<2; i++){
3045                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3046                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3047                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3048                     }
3049                     break;
3050                 case CANDIDATE_MB_TYPE_BIDIR_I:
3051                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3052                     s->mv_type = MV_TYPE_FIELD;
3053                     s->mb_intra= 0;
3054                     for(dir=0; dir<2; dir++){
3055                         for(i=0; i<2; i++){
3056                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3057                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3058                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3059                         }
3060                     }
3061                     break;
3062                 default:
3063                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3064                 }
3065
3066                 encode_mb(s, motion_x, motion_y);
3067
3068                 // RAL: Update last macroblock type
3069                 s->last_mv_dir = s->mv_dir;
3070
3071                 if (CONFIG_H263_ENCODER &&
3072                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3073                     ff_h263_update_motion_val(s);
3074
3075                 ff_MPV_decode_mb(s, s->block);
3076             }
3077
3078             /* clean the MV table in IPS frames for direct mode in B frames */
3079             if(s->mb_intra /* && I,P,S_TYPE */){
3080                 s->p_mv_table[xy][0]=0;
3081                 s->p_mv_table[xy][1]=0;
3082             }
3083
3084             if(s->flags&CODEC_FLAG_PSNR){
3085                 int w= 16;
3086                 int h= 16;
3087
3088                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3089                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3090
3091                 s->current_picture.f->error[0] += sse(
3092                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3093                     s->dest[0], w, h, s->linesize);
3094                 s->current_picture.f->error[1] += sse(
3095                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3096                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3097                 s->current_picture.f->error[2] += sse(
3098                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3099                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3100             }
3101             if(s->loop_filter){
3102                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3103                     ff_h263_loop_filter(s);
3104             }
3105             av_dlog(s->avctx, "MB %d %d bits\n",
3106                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3107         }
3108     }
3109
3110     //not beautiful here but we must write it before flushing so it has to be here
3111     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3112         ff_msmpeg4_encode_ext_header(s);
3113
3114     write_slice_end(s);
3115
3116     /* Send the last GOB if RTP */
3117     if (s->avctx->rtp_callback) {
3118         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3119         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3120         /* Call the RTP callback to send the last GOB */
3121         emms_c();
3122         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3123     }
3124
3125     return 0;
3126 }
3127
3128 #define MERGE(field) dst->field += src->field; src->field=0
3129 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3130     MERGE(me.scene_change_score);
3131     MERGE(me.mc_mb_var_sum_temp);
3132     MERGE(me.mb_var_sum_temp);
3133 }
3134
3135 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3136     int i;
3137
3138     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3139     MERGE(dct_count[1]);
3140     MERGE(mv_bits);
3141     MERGE(i_tex_bits);
3142     MERGE(p_tex_bits);
3143     MERGE(i_count);
3144     MERGE(f_count);
3145     MERGE(b_count);
3146     MERGE(skip_count);
3147     MERGE(misc_bits);
3148     MERGE(er.error_count);
3149     MERGE(padding_bug_score);
3150     MERGE(current_picture.f->error[0]);
3151     MERGE(current_picture.f->error[1]);
3152     MERGE(current_picture.f->error[2]);
3153
3154     if(dst->avctx->noise_reduction){
3155         for(i=0; i<64; i++){
3156             MERGE(dct_error_sum[0][i]);
3157             MERGE(dct_error_sum[1][i]);
3158         }
3159     }
3160
3161     assert(put_bits_count(&src->pb) % 8 ==0);
3162     assert(put_bits_count(&dst->pb) % 8 ==0);
3163     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3164     flush_put_bits(&dst->pb);
3165 }
3166
3167 static int estimate_qp(MpegEncContext *s, int dry_run){
3168     if (s->next_lambda){
3169         s->current_picture_ptr->f->quality =
3170         s->current_picture.f->quality = s->next_lambda;
3171         if(!dry_run) s->next_lambda= 0;
3172     } else if (!s->fixed_qscale) {
3173         s->current_picture_ptr->f->quality =
3174         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3175         if (s->current_picture.f->quality < 0)
3176             return -1;
3177     }
3178
3179     if(s->adaptive_quant){
3180         switch(s->codec_id){
3181         case AV_CODEC_ID_MPEG4:
3182             if (CONFIG_MPEG4_ENCODER)
3183                 ff_clean_mpeg4_qscales(s);
3184             break;
3185         case AV_CODEC_ID_H263:
3186         case AV_CODEC_ID_H263P:
3187         case AV_CODEC_ID_FLV1:
3188             if (CONFIG_H263_ENCODER)
3189                 ff_clean_h263_qscales(s);
3190             break;
3191         default:
3192             ff_init_qscale_tab(s);
3193         }
3194
3195         s->lambda= s->lambda_table[0];
3196         //FIXME broken
3197     }else
3198         s->lambda = s->current_picture.f->quality;
3199     update_qscale(s);
3200     return 0;
3201 }
3202
3203 /* must be called before writing the header */
3204 static void set_frame_distances(MpegEncContext * s){
3205     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3206     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3207
3208     if(s->pict_type==AV_PICTURE_TYPE_B){
3209         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3210         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3211     }else{
3212         s->pp_time= s->time - s->last_non_b_time;
3213         s->last_non_b_time= s->time;
3214         assert(s->picture_number==0 || s->pp_time > 0);
3215     }
3216 }
3217
3218 static int encode_picture(MpegEncContext *s, int picture_number)
3219 {
3220     int i, ret;
3221     int bits;
3222     int context_count = s->slice_context_count;
3223
3224     s->picture_number = picture_number;
3225
3226     /* Reset the average MB variance */
3227     s->me.mb_var_sum_temp    =
3228     s->me.mc_mb_var_sum_temp = 0;
3229
3230     /* we need to initialize some time vars before we can encode b-frames */
3231     // RAL: Condition added for MPEG1VIDEO
3232     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3233         set_frame_distances(s);
3234     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3235         ff_set_mpeg4_time(s);
3236
3237     s->me.scene_change_score=0;
3238
3239 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3240
3241     if(s->pict_type==AV_PICTURE_TYPE_I){
3242         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3243         else                        s->no_rounding=0;
3244     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3245         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3246             s->no_rounding ^= 1;
3247     }
3248
3249     if(s->flags & CODEC_FLAG_PASS2){
3250         if (estimate_qp(s,1) < 0)
3251             return -1;
3252         ff_get_2pass_fcode(s);
3253     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3254         if(s->pict_type==AV_PICTURE_TYPE_B)
3255             s->lambda= s->last_lambda_for[s->pict_type];
3256         else
3257             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3258         update_qscale(s);
3259     }
3260
3261     s->mb_intra=0; //for the rate distortion & bit compare functions
3262     for(i=1; i<context_count; i++){
3263         ret = ff_update_duplicate_context(s->thread_context[i], s);
3264         if (ret < 0)
3265             return ret;
3266     }
3267
3268     if(ff_init_me(s)<0)
3269         return -1;
3270
3271     /* Estimate motion for every MB */
3272     if(s->pict_type != AV_PICTURE_TYPE_I){
3273         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3274         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3275         if (s->pict_type != AV_PICTURE_TYPE_B) {
3276             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3277                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3278             }
3279         }
3280
3281         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3282     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3283         /* I-Frame */
3284         for(i=0; i<s->mb_stride*s->mb_height; i++)
3285             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3286
3287         if(!s->fixed_qscale){
3288             /* finding spatial complexity for I-frame rate control */
3289             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3290         }
3291     }
3292     for(i=1; i<context_count; i++){
3293         merge_context_after_me(s, s->thread_context[i]);
3294     }
3295     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3296     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3297     emms_c();
3298
3299     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3300         s->pict_type= AV_PICTURE_TYPE_I;
3301         for(i=0; i<s->mb_stride*s->mb_height; i++)
3302             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3303         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3304                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3305     }
3306
3307     if(!s->umvplus){
3308         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3309             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3310
3311             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3312                 int a,b;
3313                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3314                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3315                 s->f_code= FFMAX3(s->f_code, a, b);
3316             }
3317
3318             ff_fix_long_p_mvs(s);
3319             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3320             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3321                 int j;
3322                 for(i=0; i<2; i++){
3323                     for(j=0; j<2; j++)
3324                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3325                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3326                 }
3327             }
3328         }
3329
3330         if(s->pict_type==AV_PICTURE_TYPE_B){
3331             int a, b;
3332
3333             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3334             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3335             s->f_code = FFMAX(a, b);
3336
3337             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3338             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3339             s->b_code = FFMAX(a, b);
3340
3341             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3342             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3343             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3344             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3345             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3346                 int dir, j;
3347                 for(dir=0; dir<2; dir++){
3348                     for(i=0; i<2; i++){
3349                         for(j=0; j<2; j++){
3350                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3351                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3352                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3353                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3354                         }
3355                     }
3356                 }
3357             }
3358         }
3359     }
3360
3361     if (estimate_qp(s, 0) < 0)
3362         return -1;
3363
3364     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3365         s->qscale= 3; //reduce clipping problems
3366
3367     if (s->out_format == FMT_MJPEG) {
3368         /* for mjpeg, we do include qscale in the matrix */
3369         for(i=1;i<64;i++){
3370             int j = s->idsp.idct_permutation[i];
3371
3372             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3373         }
3374         s->y_dc_scale_table=
3375         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3376         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3377         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3378                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3379         s->qscale= 8;
3380     }
3381
3382     //FIXME var duplication
3383     s->current_picture_ptr->f->key_frame =
3384     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3385     s->current_picture_ptr->f->pict_type =
3386     s->current_picture.f->pict_type = s->pict_type;
3387
3388     if (s->current_picture.f->key_frame)
3389         s->picture_in_gop_number=0;
3390
3391     s->last_bits= put_bits_count(&s->pb);
3392     switch(s->out_format) {
3393     case FMT_MJPEG:
3394         if (CONFIG_MJPEG_ENCODER)
3395             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3396                                            s->intra_matrix);
3397         break;
3398     case FMT_H261:
3399         if (CONFIG_H261_ENCODER)
3400             ff_h261_encode_picture_header(s, picture_number);
3401         break;
3402     case FMT_H263:
3403         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3404             ff_wmv2_encode_picture_header(s, picture_number);
3405         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3406             ff_msmpeg4_encode_picture_header(s, picture_number);
3407         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3408             ff_mpeg4_encode_picture_header(s, picture_number);
3409         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3410             ff_rv10_encode_picture_header(s, picture_number);
3411         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3412             ff_rv20_encode_picture_header(s, picture_number);
3413         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3414             ff_flv_encode_picture_header(s, picture_number);
3415         else if (CONFIG_H263_ENCODER)
3416             ff_h263_encode_picture_header(s, picture_number);
3417         break;
3418     case FMT_MPEG1:
3419         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3420             ff_mpeg1_encode_picture_header(s, picture_number);
3421         break;
3422     default:
3423         assert(0);
3424     }
3425     bits= put_bits_count(&s->pb);
3426     s->header_bits= bits - s->last_bits;
3427
3428     for(i=1; i<context_count; i++){
3429         update_duplicate_context_after_me(s->thread_context[i], s);
3430     }
3431     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3432     for(i=1; i<context_count; i++){
3433         merge_context_after_encode(s, s->thread_context[i]);
3434     }
3435     emms_c();
3436     return 0;
3437 }
3438
3439 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3440     const int intra= s->mb_intra;
3441     int i;
3442
3443     s->dct_count[intra]++;
3444
3445     for(i=0; i<64; i++){
3446         int level= block[i];
3447
3448         if(level){
3449             if(level>0){
3450                 s->dct_error_sum[intra][i] += level;
3451                 level -= s->dct_offset[intra][i];
3452                 if(level<0) level=0;
3453             }else{
3454                 s->dct_error_sum[intra][i] -= level;
3455                 level += s->dct_offset[intra][i];
3456                 if(level>0) level=0;
3457             }
3458             block[i]= level;
3459         }
3460     }
3461 }
3462
3463 static int dct_quantize_trellis_c(MpegEncContext *s,
3464                                   int16_t *block, int n,
3465                                   int qscale, int *overflow){
3466     const int *qmat;
3467     const uint8_t *scantable= s->intra_scantable.scantable;
3468     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3469     int max=0;
3470     unsigned int threshold1, threshold2;
3471     int bias=0;
3472     int run_tab[65];
3473     int level_tab[65];
3474     int score_tab[65];
3475     int survivor[65];
3476     int survivor_count;
3477     int last_run=0;
3478     int last_level=0;
3479     int last_score= 0;
3480     int last_i;
3481     int coeff[2][64];
3482     int coeff_count[64];
3483     int qmul, qadd, start_i, last_non_zero, i, dc;
3484     const int esc_length= s->ac_esc_length;
3485     uint8_t * length;
3486     uint8_t * last_length;
3487     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3488
3489     s->fdsp.fdct(block);
3490
3491     if(s->dct_error_sum)
3492         s->denoise_dct(s, block);
3493     qmul= qscale*16;
3494     qadd= ((qscale-1)|1)*8;
3495
3496     if (s->mb_intra) {
3497         int q;
3498         if (!s->h263_aic) {
3499             if (n < 4)
3500                 q = s->y_dc_scale;
3501             else
3502                 q = s->c_dc_scale;
3503             q = q << 3;
3504         } else{
3505             /* For AIC we skip quant/dequant of INTRADC */
3506             q = 1 << 3;
3507             qadd=0;
3508         }
3509
3510         /* note: block[0] is assumed to be positive */
3511         block[0] = (block[0] + (q >> 1)) / q;
3512         start_i = 1;
3513         last_non_zero = 0;
3514         qmat = s->q_intra_matrix[qscale];
3515         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3516             bias= 1<<(QMAT_SHIFT-1);
3517         length     = s->intra_ac_vlc_length;
3518         last_length= s->intra_ac_vlc_last_length;
3519     } else {
3520         start_i = 0;
3521         last_non_zero = -1;
3522         qmat = s->q_inter_matrix[qscale];
3523         length     = s->inter_ac_vlc_length;
3524         last_length= s->inter_ac_vlc_last_length;
3525     }
3526     last_i= start_i;
3527
3528     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3529     threshold2= (threshold1<<1);
3530
3531     for(i=63; i>=start_i; i--) {
3532         const int j = scantable[i];
3533         int level = block[j] * qmat[j];
3534
3535         if(((unsigned)(level+threshold1))>threshold2){
3536             last_non_zero = i;
3537             break;
3538         }
3539     }
3540
3541     for(i=start_i; i<=last_non_zero; i++) {
3542         const int j = scantable[i];
3543         int level = block[j] * qmat[j];
3544
3545 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3546 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3547         if(((unsigned)(level+threshold1))>threshold2){
3548             if(level>0){
3549                 level= (bias + level)>>QMAT_SHIFT;
3550                 coeff[0][i]= level;
3551                 coeff[1][i]= level-1;
3552 //                coeff[2][k]= level-2;
3553             }else{
3554                 level= (bias - level)>>QMAT_SHIFT;
3555                 coeff[0][i]= -level;
3556                 coeff[1][i]= -level+1;
3557 //                coeff[2][k]= -level+2;
3558             }
3559             coeff_count[i]= FFMIN(level, 2);
3560             assert(coeff_count[i]);
3561             max |=level;
3562         }else{
3563             coeff[0][i]= (level>>31)|1;
3564             coeff_count[i]= 1;
3565         }
3566     }
3567
3568     *overflow= s->max_qcoeff < max; //overflow might have happened
3569
3570     if(last_non_zero < start_i){
3571         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3572         return last_non_zero;
3573     }
3574
3575     score_tab[start_i]= 0;
3576     survivor[0]= start_i;
3577     survivor_count= 1;
3578
3579     for(i=start_i; i<=last_non_zero; i++){
3580         int level_index, j, zero_distortion;
3581         int dct_coeff= FFABS(block[ scantable[i] ]);
3582         int best_score=256*256*256*120;
3583
3584         if (s->fdsp.fdct == ff_fdct_ifast)
3585             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3586         zero_distortion= dct_coeff*dct_coeff;
3587
3588         for(level_index=0; level_index < coeff_count[i]; level_index++){
3589             int distortion;
3590             int level= coeff[level_index][i];
3591             const int alevel= FFABS(level);
3592             int unquant_coeff;
3593
3594             assert(level);
3595
3596             if(s->out_format == FMT_H263){
3597                 unquant_coeff= alevel*qmul + qadd;
3598             }else{ //MPEG1
3599                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3600                 if(s->mb_intra){
3601                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3602                         unquant_coeff =   (unquant_coeff - 1) | 1;
3603                 }else{
3604                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3605                         unquant_coeff =   (unquant_coeff - 1) | 1;
3606                 }
3607                 unquant_coeff<<= 3;
3608             }
3609
3610             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3611             level+=64;
3612             if((level&(~127)) == 0){
3613                 for(j=survivor_count-1; j>=0; j--){
3614                     int run= i - survivor[j];
3615                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3616                     score += score_tab[i-run];
3617
3618                     if(score < best_score){
3619                         best_score= score;
3620                         run_tab[i+1]= run;
3621                         level_tab[i+1]= level-64;
3622                     }
3623                 }
3624
3625                 if(s->out_format == FMT_H263){
3626                     for(j=survivor_count-1; j>=0; j--){
3627                         int run= i - survivor[j];
3628                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3629                         score += score_tab[i-run];
3630                         if(score < last_score){
3631                             last_score= score;
3632                             last_run= run;
3633                             last_level= level-64;
3634                             last_i= i+1;
3635                         }
3636                     }
3637                 }
3638             }else{
3639                 distortion += esc_length*lambda;
3640                 for(j=survivor_count-1; j>=0; j--){
3641                     int run= i - survivor[j];
3642                     int score= distortion + score_tab[i-run];
3643
3644                     if(score < best_score){
3645                         best_score= score;
3646                         run_tab[i+1]= run;
3647                         level_tab[i+1]= level-64;
3648                     }
3649                 }
3650
3651                 if(s->out_format == FMT_H263){
3652                   for(j=survivor_count-1; j>=0; j--){
3653                         int run= i - survivor[j];
3654                         int score= distortion + score_tab[i-run];
3655                         if(score < last_score){
3656                             last_score= score;
3657                             last_run= run;
3658                             last_level= level-64;
3659                             last_i= i+1;
3660                         }
3661                     }
3662                 }
3663             }
3664         }
3665
3666         score_tab[i+1]= best_score;
3667
3668         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3669         if(last_non_zero <= 27){
3670             for(; survivor_count; survivor_count--){
3671                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3672                     break;
3673             }
3674         }else{
3675             for(; survivor_count; survivor_count--){
3676                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3677                     break;
3678             }
3679         }
3680
3681         survivor[ survivor_count++ ]= i+1;
3682     }
3683
3684     if(s->out_format != FMT_H263){
3685         last_score= 256*256*256*120;
3686         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3687             int score= score_tab[i];
3688             if(i) score += lambda*2; //FIXME exacter?
3689
3690             if(score < last_score){
3691                 last_score= score;
3692                 last_i= i;
3693                 last_level= level_tab[i];
3694                 last_run= run_tab[i];
3695             }
3696         }
3697     }
3698
3699     s->coded_score[n] = last_score;
3700
3701     dc= FFABS(block[0]);
3702     last_non_zero= last_i - 1;
3703     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3704
3705     if(last_non_zero < start_i)
3706         return last_non_zero;
3707
3708     if(last_non_zero == 0 && start_i == 0){
3709         int best_level= 0;
3710         int best_score= dc * dc;
3711
3712         for(i=0; i<coeff_count[0]; i++){
3713             int level= coeff[i][0];
3714             int alevel= FFABS(level);
3715             int unquant_coeff, score, distortion;
3716
3717             if(s->out_format == FMT_H263){
3718                     unquant_coeff= (alevel*qmul + qadd)>>3;
3719             }else{ //MPEG1
3720                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3721                     unquant_coeff =   (unquant_coeff - 1) | 1;
3722             }
3723             unquant_coeff = (unquant_coeff + 4) >> 3;
3724             unquant_coeff<<= 3 + 3;
3725
3726             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3727             level+=64;
3728             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3729             else                    score= distortion + esc_length*lambda;
3730
3731             if(score < best_score){
3732                 best_score= score;
3733                 best_level= level - 64;
3734             }
3735         }
3736         block[0]= best_level;
3737         s->coded_score[n] = best_score - dc*dc;
3738         if(best_level == 0) return -1;
3739         else                return last_non_zero;
3740     }
3741
3742     i= last_i;
3743     assert(last_level);
3744
3745     block[ perm_scantable[last_non_zero] ]= last_level;
3746     i -= last_run + 1;
3747
3748     for(; i>start_i; i -= run_tab[i] + 1){
3749         block[ perm_scantable[i-1] ]= level_tab[i];
3750     }
3751
3752     return last_non_zero;
3753 }
3754
3755 //#define REFINE_STATS 1
3756 static int16_t basis[64][64];
3757
3758 static void build_basis(uint8_t *perm){
3759     int i, j, x, y;
3760     emms_c();
3761     for(i=0; i<8; i++){
3762         for(j=0; j<8; j++){
3763             for(y=0; y<8; y++){
3764                 for(x=0; x<8; x++){
3765                     double s= 0.25*(1<<BASIS_SHIFT);
3766                     int index= 8*i + j;
3767                     int perm_index= perm[index];
3768                     if(i==0) s*= sqrt(0.5);
3769                     if(j==0) s*= sqrt(0.5);
3770                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3771                 }
3772             }
3773         }
3774     }
3775 }
3776
3777 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3778                         int16_t *block, int16_t *weight, int16_t *orig,
3779                         int n, int qscale){
3780     int16_t rem[64];
3781     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3782     const uint8_t *scantable= s->intra_scantable.scantable;
3783     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3784 //    unsigned int threshold1, threshold2;
3785 //    int bias=0;
3786     int run_tab[65];
3787     int prev_run=0;
3788     int prev_level=0;
3789     int qmul, qadd, start_i, last_non_zero, i, dc;
3790     uint8_t * length;
3791     uint8_t * last_length;
3792     int lambda;
3793     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3794 #ifdef REFINE_STATS
3795 static int count=0;
3796 static int after_last=0;
3797 static int to_zero=0;
3798 static int from_zero=0;
3799 static int raise=0;
3800 static int lower=0;
3801 static int messed_sign=0;
3802 #endif
3803
3804     if(basis[0][0] == 0)
3805         build_basis(s->idsp.idct_permutation);
3806
3807     qmul= qscale*2;
3808     qadd= (qscale-1)|1;
3809     if (s->mb_intra) {
3810         if (!s->h263_aic) {
3811             if (n < 4)
3812                 q = s->y_dc_scale;
3813             else
3814                 q = s->c_dc_scale;
3815         } else{
3816             /* For AIC we skip quant/dequant of INTRADC */
3817             q = 1;
3818             qadd=0;
3819         }
3820         q <<= RECON_SHIFT-3;
3821         /* note: block[0] is assumed to be positive */
3822         dc= block[0]*q;
3823 //        block[0] = (block[0] + (q >> 1)) / q;
3824         start_i = 1;
3825 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3826 //            bias= 1<<(QMAT_SHIFT-1);
3827         length     = s->intra_ac_vlc_length;
3828         last_length= s->intra_ac_vlc_last_length;
3829     } else {
3830         dc= 0;
3831         start_i = 0;
3832         length     = s->inter_ac_vlc_length;
3833         last_length= s->inter_ac_vlc_last_length;
3834     }
3835     last_non_zero = s->block_last_index[n];
3836
3837 #ifdef REFINE_STATS
3838 {START_TIMER
3839 #endif
3840     dc += (1<<(RECON_SHIFT-1));
3841     for(i=0; i<64; i++){
3842         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3843     }
3844 #ifdef REFINE_STATS
3845 STOP_TIMER("memset rem[]")}
3846 #endif
3847     sum=0;
3848     for(i=0; i<64; i++){
3849         int one= 36;
3850         int qns=4;
3851         int w;
3852
3853         w= FFABS(weight[i]) + qns*one;
3854         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3855
3856         weight[i] = w;
3857 //        w=weight[i] = (63*qns + (w/2)) / w;
3858
3859         assert(w>0);
3860         assert(w<(1<<6));
3861         sum += w*w;
3862     }
3863     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3864 #ifdef REFINE_STATS
3865 {START_TIMER
3866 #endif
3867     run=0;
3868     rle_index=0;
3869     for(i=start_i; i<=last_non_zero; i++){
3870         int j= perm_scantable[i];
3871         const int level= block[j];
3872         int coeff;
3873
3874         if(level){
3875             if(level<0) coeff= qmul*level - qadd;
3876             else        coeff= qmul*level + qadd;
3877             run_tab[rle_index++]=run;
3878             run=0;
3879
3880             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
3881         }else{
3882             run++;
3883         }
3884     }
3885 #ifdef REFINE_STATS
3886 if(last_non_zero>0){
3887 STOP_TIMER("init rem[]")
3888 }
3889 }
3890
3891 {START_TIMER
3892 #endif
3893     for(;;){
3894         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
3895         int best_coeff=0;
3896         int best_change=0;
3897         int run2, best_unquant_change=0, analyze_gradient;
3898 #ifdef REFINE_STATS
3899 {START_TIMER
3900 #endif
3901         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3902
3903         if(analyze_gradient){
3904 #ifdef REFINE_STATS
3905 {START_TIMER
3906 #endif
3907             for(i=0; i<64; i++){
3908                 int w= weight[i];
3909
3910                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3911             }
3912 #ifdef REFINE_STATS
3913 STOP_TIMER("rem*w*w")}
3914 {START_TIMER
3915 #endif
3916             s->fdsp.fdct(d1);
3917 #ifdef REFINE_STATS
3918 STOP_TIMER("dct")}
3919 #endif
3920         }
3921
3922         if(start_i){
3923             const int level= block[0];
3924             int change, old_coeff;
3925
3926             assert(s->mb_intra);
3927
3928             old_coeff= q*level;
3929
3930             for(change=-1; change<=1; change+=2){
3931                 int new_level= level + change;
3932                 int score, new_coeff;
3933
3934                 new_coeff= q*new_level;
3935                 if(new_coeff >= 2048 || new_coeff < 0)
3936                     continue;
3937
3938                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
3939                                                   new_coeff - old_coeff);
3940                 if(score<best_score){
3941                     best_score= score;
3942                     best_coeff= 0;
3943                     best_change= change;
3944                     best_unquant_change= new_coeff - old_coeff;
3945                 }
3946             }
3947         }
3948
3949         run=0;
3950         rle_index=0;
3951         run2= run_tab[rle_index++];
3952         prev_level=0;
3953         prev_run=0;
3954
3955         for(i=start_i; i<64; i++){
3956             int j= perm_scantable[i];
3957             const int level= block[j];
3958             int change, old_coeff;
3959
3960             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3961                 break;
3962
3963             if(level){
3964                 if(level<0) old_coeff= qmul*level - qadd;
3965                 else        old_coeff= qmul*level + qadd;
3966                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3967             }else{
3968                 old_coeff=0;
3969                 run2--;
3970                 assert(run2>=0 || i >= last_non_zero );
3971             }
3972
3973             for(change=-1; change<=1; change+=2){
3974                 int new_level= level + change;
3975                 int score, new_coeff, unquant_change;
3976
3977                 score=0;
3978                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3979                    continue;
3980
3981                 if(new_level){
3982                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3983                     else            new_coeff= qmul*new_level + qadd;
3984                     if(new_coeff >= 2048 || new_coeff <= -2048)
3985                         continue;
3986                     //FIXME check for overflow
3987
3988                     if(level){
3989                         if(level < 63 && level > -63){
3990                             if(i < last_non_zero)
3991                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3992                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3993                             else
3994                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3995                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3996                         }
3997                     }else{
3998                         assert(FFABS(new_level)==1);
3999
4000                         if(analyze_gradient){
4001                             int g= d1[ scantable[i] ];
4002                             if(g && (g^new_level) >= 0)
4003                                 continue;
4004                         }
4005
4006                         if(i < last_non_zero){
4007                             int next_i= i + run2 + 1;
4008                             int next_level= block[ perm_scantable[next_i] ] + 64;
4009
4010                             if(next_level&(~127))
4011                                 next_level= 0;
4012
4013                             if(next_i < last_non_zero)
4014                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4015                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4016                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4017                             else
4018                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4019                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4020                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4021                         }else{
4022                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4023                             if(prev_level){
4024                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4025                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4026                             }
4027                         }
4028                     }
4029                 }else{
4030                     new_coeff=0;
4031                     assert(FFABS(level)==1);
4032
4033                     if(i < last_non_zero){
4034                         int next_i= i + run2 + 1;
4035                         int next_level= block[ perm_scantable[next_i] ] + 64;
4036
4037                         if(next_level&(~127))
4038                             next_level= 0;
4039
4040                         if(next_i < last_non_zero)
4041                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4042                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4043                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4044                         else
4045                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4046                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4047                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4048                     }else{
4049                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4050                         if(prev_level){
4051                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4052                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4053                         }
4054                     }
4055                 }
4056
4057                 score *= lambda;
4058
4059                 unquant_change= new_coeff - old_coeff;
4060                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4061
4062                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4063                                                    unquant_change);
4064                 if(score<best_score){
4065                     best_score= score;
4066                     best_coeff= i;
4067                     best_change= change;
4068                     best_unquant_change= unquant_change;
4069                 }
4070             }
4071             if(level){
4072                 prev_level= level + 64;
4073                 if(prev_level&(~127))
4074                     prev_level= 0;
4075                 prev_run= run;
4076                 run=0;
4077             }else{
4078                 run++;
4079             }
4080         }
4081 #ifdef REFINE_STATS
4082 STOP_TIMER("iterative step")}
4083 #endif
4084
4085         if(best_change){
4086             int j= perm_scantable[ best_coeff ];
4087
4088             block[j] += best_change;
4089
4090             if(best_coeff > last_non_zero){
4091                 last_non_zero= best_coeff;
4092                 assert(block[j]);
4093 #ifdef REFINE_STATS
4094 after_last++;
4095 #endif
4096             }else{
4097 #ifdef REFINE_STATS
4098 if(block[j]){
4099     if(block[j] - best_change){
4100         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4101             raise++;
4102         }else{
4103             lower++;
4104         }
4105     }else{
4106         from_zero++;
4107     }
4108 }else{
4109     to_zero++;
4110 }
4111 #endif
4112                 for(; last_non_zero>=start_i; last_non_zero--){
4113                     if(block[perm_scantable[last_non_zero]])
4114                         break;
4115                 }
4116             }
4117 #ifdef REFINE_STATS
4118 count++;
4119 if(256*256*256*64 % count == 0){
4120     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4121 }
4122 #endif
4123             run=0;
4124             rle_index=0;
4125             for(i=start_i; i<=last_non_zero; i++){
4126                 int j= perm_scantable[i];
4127                 const int level= block[j];
4128
4129                  if(level){
4130                      run_tab[rle_index++]=run;
4131                      run=0;
4132                  }else{
4133                      run++;
4134                  }
4135             }
4136
4137             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4138         }else{
4139             break;
4140         }
4141     }
4142 #ifdef REFINE_STATS
4143 if(last_non_zero>0){
4144 STOP_TIMER("iterative search")
4145 }
4146 }
4147 #endif
4148
4149     return last_non_zero;
4150 }
4151
4152 int ff_dct_quantize_c(MpegEncContext *s,
4153                         int16_t *block, int n,
4154                         int qscale, int *overflow)
4155 {
4156     int i, j, level, last_non_zero, q, start_i;
4157     const int *qmat;
4158     const uint8_t *scantable= s->intra_scantable.scantable;
4159     int bias;
4160     int max=0;
4161     unsigned int threshold1, threshold2;
4162
4163     s->fdsp.fdct(block);
4164
4165     if(s->dct_error_sum)
4166         s->denoise_dct(s, block);
4167
4168     if (s->mb_intra) {
4169         if (!s->h263_aic) {
4170             if (n < 4)
4171                 q = s->y_dc_scale;
4172             else
4173                 q = s->c_dc_scale;
4174             q = q << 3;
4175         } else
4176             /* For AIC we skip quant/dequant of INTRADC */
4177             q = 1 << 3;
4178
4179         /* note: block[0] is assumed to be positive */
4180         block[0] = (block[0] + (q >> 1)) / q;
4181         start_i = 1;
4182         last_non_zero = 0;
4183         qmat = s->q_intra_matrix[qscale];
4184         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4185     } else {
4186         start_i = 0;
4187         last_non_zero = -1;
4188         qmat = s->q_inter_matrix[qscale];
4189         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4190     }
4191     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4192     threshold2= (threshold1<<1);
4193     for(i=63;i>=start_i;i--) {
4194         j = scantable[i];
4195         level = block[j] * qmat[j];
4196
4197         if(((unsigned)(level+threshold1))>threshold2){
4198             last_non_zero = i;
4199             break;
4200         }else{
4201             block[j]=0;
4202         }
4203     }
4204     for(i=start_i; i<=last_non_zero; i++) {
4205         j = scantable[i];
4206         level = block[j] * qmat[j];
4207
4208 //        if(   bias+level >= (1<<QMAT_SHIFT)
4209 //           || bias-level >= (1<<QMAT_SHIFT)){
4210         if(((unsigned)(level+threshold1))>threshold2){
4211             if(level>0){
4212                 level= (bias + level)>>QMAT_SHIFT;
4213                 block[j]= level;
4214             }else{
4215                 level= (bias - level)>>QMAT_SHIFT;
4216                 block[j]= -level;
4217             }
4218             max |=level;
4219         }else{
4220             block[j]=0;
4221         }
4222     }
4223     *overflow= s->max_qcoeff < max; //overflow might have happened
4224
4225     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4226     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4227         ff_block_permute(block, s->idsp.idct_permutation,
4228                          scantable, last_non_zero);
4229
4230     return last_non_zero;
4231 }
4232
4233 #define OFFSET(x) offsetof(MpegEncContext, x)
4234 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4235 static const AVOption h263_options[] = {
4236     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4237     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4238     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4239     FF_MPV_COMMON_OPTS
4240     { NULL },
4241 };
4242
4243 static const AVClass h263_class = {
4244     .class_name = "H.263 encoder",
4245     .item_name  = av_default_item_name,
4246     .option     = h263_options,
4247     .version    = LIBAVUTIL_VERSION_INT,
4248 };
4249
4250 AVCodec ff_h263_encoder = {
4251     .name           = "h263",
4252     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4253     .type           = AVMEDIA_TYPE_VIDEO,
4254     .id             = AV_CODEC_ID_H263,
4255     .priv_data_size = sizeof(MpegEncContext),
4256     .init           = ff_MPV_encode_init,
4257     .encode2        = ff_MPV_encode_picture,
4258     .close          = ff_MPV_encode_end,
4259     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4260     .priv_class     = &h263_class,
4261 };
4262
4263 static const AVOption h263p_options[] = {
4264     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4265     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4266     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4267     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4268     FF_MPV_COMMON_OPTS
4269     { NULL },
4270 };
4271 static const AVClass h263p_class = {
4272     .class_name = "H.263p encoder",
4273     .item_name  = av_default_item_name,
4274     .option     = h263p_options,
4275     .version    = LIBAVUTIL_VERSION_INT,
4276 };
4277
4278 AVCodec ff_h263p_encoder = {
4279     .name           = "h263p",
4280     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4281     .type           = AVMEDIA_TYPE_VIDEO,
4282     .id             = AV_CODEC_ID_H263P,
4283     .priv_data_size = sizeof(MpegEncContext),
4284     .init           = ff_MPV_encode_init,
4285     .encode2        = ff_MPV_encode_picture,
4286     .close          = ff_MPV_encode_end,
4287     .capabilities   = CODEC_CAP_SLICE_THREADS,
4288     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4289     .priv_class     = &h263p_class,
4290 };
4291
4292 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4293
4294 AVCodec ff_msmpeg4v2_encoder = {
4295     .name           = "msmpeg4v2",
4296     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4297     .type           = AVMEDIA_TYPE_VIDEO,
4298     .id             = AV_CODEC_ID_MSMPEG4V2,
4299     .priv_data_size = sizeof(MpegEncContext),
4300     .init           = ff_MPV_encode_init,
4301     .encode2        = ff_MPV_encode_picture,
4302     .close          = ff_MPV_encode_end,
4303     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4304     .priv_class     = &msmpeg4v2_class,
4305 };
4306
4307 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4308
4309 AVCodec ff_msmpeg4v3_encoder = {
4310     .name           = "msmpeg4",
4311     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4312     .type           = AVMEDIA_TYPE_VIDEO,
4313     .id             = AV_CODEC_ID_MSMPEG4V3,
4314     .priv_data_size = sizeof(MpegEncContext),
4315     .init           = ff_MPV_encode_init,
4316     .encode2        = ff_MPV_encode_picture,
4317     .close          = ff_MPV_encode_end,
4318     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4319     .priv_class     = &msmpeg4v3_class,
4320 };
4321
4322 FF_MPV_GENERIC_CLASS(wmv1)
4323
4324 AVCodec ff_wmv1_encoder = {
4325     .name           = "wmv1",
4326     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4327     .type           = AVMEDIA_TYPE_VIDEO,
4328     .id             = AV_CODEC_ID_WMV1,
4329     .priv_data_size = sizeof(MpegEncContext),
4330     .init           = ff_MPV_encode_init,
4331     .encode2        = ff_MPV_encode_picture,
4332     .close          = ff_MPV_encode_end,
4333     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4334     .priv_class     = &wmv1_class,
4335 };