git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "dsputil.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "h261.h"
  44 #include "h263.h"
  45 #include "mathops.h"
  46 #include "mpegutils.h"
  47 #include "mjpegenc.h"
  48 #include "msmpeg4.h"
  49 #include "qpeldsp.h"
  50 #include "faandct.h"
  51 #include "thread.h"
  52 #include "aandcttab.h"
  53 #include "flv.h"
  54 #include "mpeg4video.h"
  55 #include "internal.h"
  56 #include "bytestream.h"
  57 #include <limits.h>
  58
  59 static int encode_picture(MpegEncContext *s, int picture_number);
  60 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  61 static int sse_mb(MpegEncContext *s);
  62 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  63 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  64
  65 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  66 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  67
  68 const AVOption ff_mpv_generic_options[] = {
  69     FF_MPV_COMMON_OPTS
  70     { NULL },
  71 };
  72
  73 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  74                        uint16_t (*qmat16)[2][64],
  75                        const uint16_t *quant_matrix,
  76                        int bias, int qmin, int qmax, int intra)
  77 {
  78     DSPContext *dsp = &s->dsp;
  79     int qscale;
  80     int shift = 0;
  81
  82     for (qscale = qmin; qscale <= qmax; qscale++) {
  83         int i;
  84         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
  85             dsp->fdct == ff_jpeg_fdct_islow_10 ||
  86             dsp->fdct == ff_faandct) {
  87             for (i = 0; i < 64; i++) {
  88                 const int j = dsp->idct_permutation[i];
  89                 /* 16 <= qscale * quant_matrix[i] <= 7905
  90                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  91                  *             19952 <=              x  <= 249205026
  92                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
  93                  *           3444240 >= (1 << 36) / (x) >= 275 */
  94
  95                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
  96                                         (qscale * quant_matrix[j]));
  97             }
  98         } else if (dsp->fdct == ff_fdct_ifast) {
  99             for (i = 0; i < 64; i++) {
 100                 const int j = dsp->idct_permutation[i];
 101                 /* 16 <= qscale * quant_matrix[i] <= 7905
 102                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 103                  *             19952 <=              x  <= 249205026
 104                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 105                  *           3444240 >= (1 << 36) / (x) >= 275 */
 106
 107                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 108                                         (ff_aanscales[i] * qscale *
 109                                          quant_matrix[j]));
 110             }
 111         } else {
 112             for (i = 0; i < 64; i++) {
 113                 const int j = dsp->idct_permutation[i];
 114                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 115                  * Assume x = qscale * quant_matrix[i]
 116                  * So             16 <=              x  <= 7905
 117                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 118                  * so          32768 >= (1 << 19) / (x) >= 67 */
 119                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 120                                         (qscale * quant_matrix[j]));
 121                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 122                 //                    (qscale * quant_matrix[i]);
 123                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 124                                        (qscale * quant_matrix[j]);
 125
 126                 if (qmat16[qscale][0][i] == 0 ||
 127                     qmat16[qscale][0][i] == 128 * 256)
 128                     qmat16[qscale][0][i] = 128 * 256 - 1;
 129                 qmat16[qscale][1][i] =
 130                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 131                                 qmat16[qscale][0][i]);
 132             }
 133         }
 134
 135         for (i = intra; i < 64; i++) {
 136             int64_t max = 8191;
 137             if (dsp->fdct == ff_fdct_ifast) {
 138                 max = (8191LL * ff_aanscales[i]) >> 14;
 139             }
 140             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 141                 shift++;
 142             }
 143         }
 144     }
 145     if (shift) {
 146         av_log(NULL, AV_LOG_INFO,
 147                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 148                QMAT_SHIFT - shift);
 149     }
 150 }
 151
 152 static inline void update_qscale(MpegEncContext *s)
 153 {
 154     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 155                 (FF_LAMBDA_SHIFT + 7);
 156     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 157
 158     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 159                  FF_LAMBDA_SHIFT;
 160 }
 161
 162 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 163 {
 164     int i;
 165
 166     if (matrix) {
 167         put_bits(pb, 1, 1);
 168         for (i = 0; i < 64; i++) {
 169             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 170         }
 171     } else
 172         put_bits(pb, 1, 0);
 173 }
 174
 175 /**
 176  * init s->current_picture.qscale_table from s->lambda_table
 177  */
 178 void ff_init_qscale_tab(MpegEncContext *s)
 179 {
 180     int8_t * const qscale_table = s->current_picture.qscale_table;
 181     int i;
 182
 183     for (i = 0; i < s->mb_num; i++) {
 184         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 185         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 186         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 187                                                   s->avctx->qmax);
 188     }
 189 }
 190
 191 static void update_duplicate_context_after_me(MpegEncContext *dst,
 192                                               MpegEncContext *src)
 193 {
 194 #define COPY(a) dst->a= src->a
 195     COPY(pict_type);
 196     COPY(current_picture);
 197     COPY(f_code);
 198     COPY(b_code);
 199     COPY(qscale);
 200     COPY(lambda);
 201     COPY(lambda2);
 202     COPY(picture_in_gop_number);
 203     COPY(gop_picture_number);
 204     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 205     COPY(progressive_frame);    // FIXME don't set in encode_header
 206     COPY(partitioned_frame);    // FIXME don't set in encode_header
 207 #undef COPY
 208 }
 209
 210 /**
 211  * Set the given MpegEncContext to defaults for encoding.
 212  * the changed fields will not depend upon the prior state of the MpegEncContext.
 213  */
 214 static void MPV_encode_defaults(MpegEncContext *s)
 215 {
 216     int i;
 217     ff_MPV_common_defaults(s);
 218
 219     for (i = -16; i < 16; i++) {
 220         default_fcode_tab[i + MAX_MV] = 1;
 221     }
 222     s->me.mv_penalty = default_mv_penalty;
 223     s->fcode_tab     = default_fcode_tab;
 224
 225     s->input_picture_number  = 0;
 226     s->picture_in_gop_number = 0;
 227 }
 228
 229 /* init video encoder */
 230 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
 231 {
 232     MpegEncContext *s = avctx->priv_data;
 233     int i, ret, format_supported;
 234
 235     MPV_encode_defaults(s);
 236
 237     switch (avctx->codec_id) {
 238     case AV_CODEC_ID_MPEG2VIDEO:
 239         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 240             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 241             av_log(avctx, AV_LOG_ERROR,
 242                    "only YUV420 and YUV422 are supported\n");
 243             return -1;
 244         }
 245         break;
 246     case AV_CODEC_ID_MJPEG:
 247         format_supported = 0;
 248         /* JPEG color space */
 249         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 250             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 251             (avctx->color_range == AVCOL_RANGE_JPEG &&
 252              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 253               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
 254             format_supported = 1;
 255         /* MPEG color space */
 256         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 257                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 258                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
 259             format_supported = 1;
 260
 261         if (!format_supported) {
 262             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 263             return -1;
 264         }
 265         break;
 266     default:
 267         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 268             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 269             return -1;
 270         }
 271     }
 272
 273     switch (avctx->pix_fmt) {
 274     case AV_PIX_FMT_YUVJ422P:
 275     case AV_PIX_FMT_YUV422P:
 276         s->chroma_format = CHROMA_422;
 277         break;
 278     case AV_PIX_FMT_YUVJ420P:
 279     case AV_PIX_FMT_YUV420P:
 280     default:
 281         s->chroma_format = CHROMA_420;
 282         break;
 283     }
 284
 285     s->bit_rate = avctx->bit_rate;
 286     s->width    = avctx->width;
 287     s->height   = avctx->height;
 288     if (avctx->gop_size > 600 &&
 289         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 290         av_log(avctx, AV_LOG_ERROR,
 291                "Warning keyframe interval too large! reducing it ...\n");
 292         avctx->gop_size = 600;
 293     }
 294     s->gop_size     = avctx->gop_size;
 295     s->avctx        = avctx;
 296     s->flags        = avctx->flags;
 297     s->flags2       = avctx->flags2;
 298     if (avctx->max_b_frames > MAX_B_FRAMES) {
 299         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 300                "is %d.\n", MAX_B_FRAMES);
 301     }
 302     s->max_b_frames = avctx->max_b_frames;
 303     s->codec_id     = avctx->codec->id;
 304     s->strict_std_compliance = avctx->strict_std_compliance;
 305     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 306     s->mpeg_quant         = avctx->mpeg_quant;
 307     s->rtp_mode           = !!avctx->rtp_payload_size;
 308     s->intra_dc_precision = avctx->intra_dc_precision;
 309     s->user_specified_pts = AV_NOPTS_VALUE;
 310
 311     if (s->gop_size <= 1) {
 312         s->intra_only = 1;
 313         s->gop_size   = 12;
 314     } else {
 315         s->intra_only = 0;
 316     }
 317
 318     s->me_method = avctx->me_method;
 319
 320     /* Fixed QSCALE */
 321     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 322
 323     s->adaptive_quant = (s->avctx->lumi_masking ||
 324                          s->avctx->dark_masking ||
 325                          s->avctx->temporal_cplx_masking ||
 326                          s->avctx->spatial_cplx_masking  ||
 327                          s->avctx->p_masking      ||
 328                          s->avctx->border_masking ||
 329                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 330                         !s->fixed_qscale;
 331
 332     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 333
 334     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 335         av_log(avctx, AV_LOG_ERROR,
 336                "a vbv buffer size is needed, "
 337                "for encoding with a maximum bitrate\n");
 338         return -1;
 339     }
 340
 341     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 342         av_log(avctx, AV_LOG_INFO,
 343                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 344     }
 345
 346     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 347         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 348         return -1;
 349     }
 350
 351     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 352         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 353         return -1;
 354     }
 355
 356     if (avctx->rc_max_rate &&
 357         avctx->rc_max_rate == avctx->bit_rate &&
 358         avctx->rc_max_rate != avctx->rc_min_rate) {
 359         av_log(avctx, AV_LOG_INFO,
 360                "impossible bitrate constraints, this will fail\n");
 361     }
 362
 363     if (avctx->rc_buffer_size &&
 364         avctx->bit_rate * (int64_t)avctx->time_base.num >
 365             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 366         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 367         return -1;
 368     }
 369
 370     if (!s->fixed_qscale &&
 371         avctx->bit_rate * av_q2d(avctx->time_base) >
 372             avctx->bit_rate_tolerance) {
 373         av_log(avctx, AV_LOG_ERROR,
 374                "bitrate tolerance too small for bitrate\n");
 375         return -1;
 376     }
 377
 378     if (s->avctx->rc_max_rate &&
 379         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 380         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 381          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 382         90000LL * (avctx->rc_buffer_size - 1) >
 383             s->avctx->rc_max_rate * 0xFFFFLL) {
 384         av_log(avctx, AV_LOG_INFO,
 385                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 386                "specified vbv buffer is too large for the given bitrate!\n");
 387     }
 388
 389     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 390         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 391         s->codec_id != AV_CODEC_ID_FLV1) {
 392         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 393         return -1;
 394     }
 395
 396     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 397         av_log(avctx, AV_LOG_ERROR,
 398                "OBMC is only supported with simple mb decision\n");
 399         return -1;
 400     }
 401
 402     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 403         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 404         return -1;
 405     }
 406
 407     if (s->max_b_frames                    &&
 408         s->codec_id != AV_CODEC_ID_MPEG4      &&
 409         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 410         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 411         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 412         return -1;
 413     }
 414
 415     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 416          s->codec_id == AV_CODEC_ID_H263  ||
 417          s->codec_id == AV_CODEC_ID_H263P) &&
 418         (avctx->sample_aspect_ratio.num > 255 ||
 419          avctx->sample_aspect_ratio.den > 255)) {
 420         av_log(avctx, AV_LOG_ERROR,
 421                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
 422                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 423         return -1;
 424     }
 425
 426     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 427         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 428         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 429         return -1;
 430     }
 431
 432     // FIXME mpeg2 uses that too
 433     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
 434         av_log(avctx, AV_LOG_ERROR,
 435                "mpeg2 style quantization not supported by codec\n");
 436         return -1;
 437     }
 438
 439     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 440         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 441         return -1;
 442     }
 443
 444     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 445         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 446         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 447         return -1;
 448     }
 449
 450     if (s->avctx->scenechange_threshold < 1000000000 &&
 451         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 452         av_log(avctx, AV_LOG_ERROR,
 453                "closed gop with scene change detection are not supported yet, "
 454                "set threshold to 1000000000\n");
 455         return -1;
 456     }
 457
 458     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 459         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 460             av_log(avctx, AV_LOG_ERROR,
 461                   "low delay forcing is only available for mpeg2\n");
 462             return -1;
 463         }
 464         if (s->max_b_frames != 0) {
 465             av_log(avctx, AV_LOG_ERROR,
 466                    "b frames cannot be used with low delay\n");
 467             return -1;
 468         }
 469     }
 470
 471     if (s->q_scale_type == 1) {
 472         if (avctx->qmax > 12) {
 473             av_log(avctx, AV_LOG_ERROR,
 474                    "non linear quant only supports qmax <= 12 currently\n");
 475             return -1;
 476         }
 477     }
 478
 479     if (s->avctx->thread_count > 1         &&
 480         s->codec_id != AV_CODEC_ID_MPEG4      &&
 481         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 482         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 483         (s->codec_id != AV_CODEC_ID_H263P)) {
 484         av_log(avctx, AV_LOG_ERROR,
 485                "multi threaded encoding not supported by codec\n");
 486         return -1;
 487     }
 488
 489     if (s->avctx->thread_count < 1) {
 490         av_log(avctx, AV_LOG_ERROR,
 491                "automatic thread number detection not supported by codec,"
 492                "patch welcome\n");
 493         return -1;
 494     }
 495
 496     if (s->avctx->thread_count > 1)
 497         s->rtp_mode = 1;
 498
 499     if (!avctx->time_base.den || !avctx->time_base.num) {
 500         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 501         return -1;
 502     }
 503
 504     i = (INT_MAX / 2 + 128) >> 8;
 505     if (avctx->mb_threshold >= i) {
 506         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 507                i - 1);
 508         return -1;
 509     }
 510
 511     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 512         av_log(avctx, AV_LOG_INFO,
 513                "notice: b_frame_strategy only affects the first pass\n");
 514         avctx->b_frame_strategy = 0;
 515     }
 516
 517     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 518     if (i > 1) {
 519         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 520         avctx->time_base.den /= i;
 521         avctx->time_base.num /= i;
 522         //return -1;
 523     }
 524
 525     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 526         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
 527         // (a + x * 3 / 8) / x
 528         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 529         s->inter_quant_bias = 0;
 530     } else {
 531         s->intra_quant_bias = 0;
 532         // (a - x / 4) / x
 533         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 534     }
 535
 536     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 537         s->intra_quant_bias = avctx->intra_quant_bias;
 538     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 539         s->inter_quant_bias = avctx->inter_quant_bias;
 540
 541     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 542         s->avctx->time_base.den > (1 << 16) - 1) {
 543         av_log(avctx, AV_LOG_ERROR,
 544                "timebase %d/%d not supported by MPEG 4 standard, "
 545                "the maximum admitted value for the timebase denominator "
 546                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 547                (1 << 16) - 1);
 548         return -1;
 549     }
 550     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 551
 552     switch (avctx->codec->id) {
 553     case AV_CODEC_ID_MPEG1VIDEO:
 554         s->out_format = FMT_MPEG1;
 555         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 556         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 557         break;
 558     case AV_CODEC_ID_MPEG2VIDEO:
 559         s->out_format = FMT_MPEG1;
 560         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 561         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 562         s->rtp_mode   = 1;
 563         break;
 564     case AV_CODEC_ID_MJPEG:
 565         s->out_format = FMT_MJPEG;
 566         s->intra_only = 1; /* force intra only for jpeg */
 567         if (!CONFIG_MJPEG_ENCODER ||
 568             ff_mjpeg_encode_init(s) < 0)
 569             return -1;
 570         avctx->delay = 0;
 571         s->low_delay = 1;
 572         break;
 573     case AV_CODEC_ID_H261:
 574         if (!CONFIG_H261_ENCODER)
 575             return -1;
 576         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 577             av_log(avctx, AV_LOG_ERROR,
 578                    "The specified picture size of %dx%d is not valid for the "
 579                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 580                     s->width, s->height);
 581             return -1;
 582         }
 583         s->out_format = FMT_H261;
 584         avctx->delay  = 0;
 585         s->low_delay  = 1;
 586         break;
 587     case AV_CODEC_ID_H263:
 588         if (!CONFIG_H263_ENCODER)
 589         return -1;
 590         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 591                              s->width, s->height) == 8) {
 592             av_log(avctx, AV_LOG_INFO,
 593                    "The specified picture size of %dx%d is not valid for "
 594                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 595                    "352x288, 704x576, and 1408x1152."
 596                    "Try H.263+.\n", s->width, s->height);
 597             return -1;
 598         }
 599         s->out_format = FMT_H263;
 600         avctx->delay  = 0;
 601         s->low_delay  = 1;
 602         break;
 603     case AV_CODEC_ID_H263P:
 604         s->out_format = FMT_H263;
 605         s->h263_plus  = 1;
 606         /* Fx */
 607         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 608         s->modified_quant  = s->h263_aic;
 609         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 610         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 611
 612         /* /Fx */
 613         /* These are just to be sure */
 614         avctx->delay = 0;
 615         s->low_delay = 1;
 616         break;
 617     case AV_CODEC_ID_FLV1:
 618         s->out_format      = FMT_H263;
 619         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 620         s->unrestricted_mv = 1;
 621         s->rtp_mode  = 0; /* don't allow GOB */
 622         avctx->delay = 0;
 623         s->low_delay = 1;
 624         break;
 625     case AV_CODEC_ID_RV10:
 626         s->out_format = FMT_H263;
 627         avctx->delay  = 0;
 628         s->low_delay  = 1;
 629         break;
 630     case AV_CODEC_ID_RV20:
 631         s->out_format      = FMT_H263;
 632         avctx->delay       = 0;
 633         s->low_delay       = 1;
 634         s->modified_quant  = 1;
 635         s->h263_aic        = 1;
 636         s->h263_plus       = 1;
 637         s->loop_filter     = 1;
 638         s->unrestricted_mv = 0;
 639         break;
 640     case AV_CODEC_ID_MPEG4:
 641         s->out_format      = FMT_H263;
 642         s->h263_pred       = 1;
 643         s->unrestricted_mv = 1;
 644         s->low_delay       = s->max_b_frames ? 0 : 1;
 645         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 646         break;
 647     case AV_CODEC_ID_MSMPEG4V2:
 648         s->out_format      = FMT_H263;
 649         s->h263_pred       = 1;
 650         s->unrestricted_mv = 1;
 651         s->msmpeg4_version = 2;
 652         avctx->delay       = 0;
 653         s->low_delay       = 1;
 654         break;
 655     case AV_CODEC_ID_MSMPEG4V3:
 656         s->out_format        = FMT_H263;
 657         s->h263_pred         = 1;
 658         s->unrestricted_mv   = 1;
 659         s->msmpeg4_version   = 3;
 660         s->flipflop_rounding = 1;
 661         avctx->delay         = 0;
 662         s->low_delay         = 1;
 663         break;
 664     case AV_CODEC_ID_WMV1:
 665         s->out_format        = FMT_H263;
 666         s->h263_pred         = 1;
 667         s->unrestricted_mv   = 1;
 668         s->msmpeg4_version   = 4;
 669         s->flipflop_rounding = 1;
 670         avctx->delay         = 0;
 671         s->low_delay         = 1;
 672         break;
 673     case AV_CODEC_ID_WMV2:
 674         s->out_format        = FMT_H263;
 675         s->h263_pred         = 1;
 676         s->unrestricted_mv   = 1;
 677         s->msmpeg4_version   = 5;
 678         s->flipflop_rounding = 1;
 679         avctx->delay         = 0;
 680         s->low_delay         = 1;
 681         break;
 682     default:
 683         return -1;
 684     }
 685
 686     avctx->has_b_frames = !s->low_delay;
 687
 688     s->encoding = 1;
 689
 690     s->progressive_frame    =
 691     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 692                                                 CODEC_FLAG_INTERLACED_ME) ||
 693                                 s->alternate_scan);
 694
 695     /* init */
 696     if (ff_MPV_common_init(s) < 0)
 697         return -1;
 698
 699     if (ARCH_X86)
 700         ff_MPV_encode_init_x86(s);
 701
 702     ff_qpeldsp_init(&s->qdsp);
 703
 704     s->avctx->coded_frame = s->current_picture.f;
 705
 706     if (s->msmpeg4_version) {
 707         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 708                           2 * 2 * (MAX_LEVEL + 1) *
 709                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 710     }
 711     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 712
 713     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 714     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 715     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 716     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 717     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 718                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 719     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 720                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 721
 722     if (s->avctx->noise_reduction) {
 723         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 724                           2 * 64 * sizeof(uint16_t), fail);
 725     }
 726
 727     if (CONFIG_H263_ENCODER)
 728         ff_h263dsp_init(&s->h263dsp);
 729     if (!s->dct_quantize)
 730         s->dct_quantize = ff_dct_quantize_c;
 731     if (!s->denoise_dct)
 732         s->denoise_dct  = denoise_dct_c;
 733     s->fast_dct_quantize = s->dct_quantize;
 734     if (avctx->trellis)
 735         s->dct_quantize  = dct_quantize_trellis_c;
 736
 737     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 738         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 739
 740     s->quant_precision = 5;
 741
 742     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
 743     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
 744
 745     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 746         ff_h261_encode_init(s);
 747     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 748         ff_h263_encode_init(s);
 749     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 750         ff_msmpeg4_encode_init(s);
 751     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 752         && s->out_format == FMT_MPEG1)
 753         ff_mpeg1_encode_init(s);
 754
 755     /* init q matrix */
 756     for (i = 0; i < 64; i++) {
 757         int j = s->dsp.idct_permutation[i];
 758         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 759             s->mpeg_quant) {
 760             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 761             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 762         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 763             s->intra_matrix[j] =
 764             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 765         } else {
 766             /* mpeg1/2 */
 767             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 768             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 769         }
 770         if (s->avctx->intra_matrix)
 771             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 772         if (s->avctx->inter_matrix)
 773             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 774     }
 775
 776     /* precompute matrix */
 777     /* for mjpeg, we do include qscale in the matrix */
 778     if (s->out_format != FMT_MJPEG) {
 779         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 780                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 781                           31, 1);
 782         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 783                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 784                           31, 0);
 785     }
 786
 787     if (ff_rate_control_init(s) < 0)
 788         return -1;
 789
 790 #if FF_API_ERROR_RATE
 791     FF_DISABLE_DEPRECATION_WARNINGS
 792     if (avctx->error_rate)
 793         s->error_rate = avctx->error_rate;
 794     FF_ENABLE_DEPRECATION_WARNINGS;
 795 #endif
 796
 797 #if FF_API_NORMALIZE_AQP
 798     FF_DISABLE_DEPRECATION_WARNINGS
 799     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 800         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 801     FF_ENABLE_DEPRECATION_WARNINGS;
 802 #endif
 803
 804 #if FF_API_MV0
 805     FF_DISABLE_DEPRECATION_WARNINGS
 806     if (avctx->flags & CODEC_FLAG_MV0)
 807         s->mpv_flags |= FF_MPV_FLAG_MV0;
 808     FF_ENABLE_DEPRECATION_WARNINGS
 809 #endif
 810
 811     if (avctx->b_frame_strategy == 2) {
 812         for (i = 0; i < s->max_b_frames + 2; i++) {
 813             s->tmp_frames[i] = av_frame_alloc();
 814             if (!s->tmp_frames[i])
 815                 return AVERROR(ENOMEM);
 816
 817             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 818             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 819             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 820
 821             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 822             if (ret < 0)
 823                 return ret;
 824         }
 825     }
 826
 827     return 0;
 828 fail:
 829     ff_MPV_encode_end(avctx);
 830     return AVERROR_UNKNOWN;
 831 }
 832
 833 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
 834 {
 835     MpegEncContext *s = avctx->priv_data;
 836     int i;
 837
 838     ff_rate_control_uninit(s);
 839
 840     ff_MPV_common_end(s);
 841     if (CONFIG_MJPEG_ENCODER &&
 842         s->out_format == FMT_MJPEG)
 843         ff_mjpeg_encode_close(s);
 844
 845     av_freep(&avctx->extradata);
 846
 847     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 848         av_frame_free(&s->tmp_frames[i]);
 849
 850     ff_free_picture_tables(&s->new_picture);
 851     ff_mpeg_unref_picture(s, &s->new_picture);
 852
 853     av_freep(&s->avctx->stats_out);
 854     av_freep(&s->ac_stats);
 855
 856     av_freep(&s->q_intra_matrix);
 857     av_freep(&s->q_inter_matrix);
 858     av_freep(&s->q_intra_matrix16);
 859     av_freep(&s->q_inter_matrix16);
 860     av_freep(&s->input_picture);
 861     av_freep(&s->reordered_input_picture);
 862     av_freep(&s->dct_offset);
 863
 864     return 0;
 865 }
 866
 867 static int get_sae(uint8_t *src, int ref, int stride)
 868 {
 869     int x,y;
 870     int acc = 0;
 871
 872     for (y = 0; y < 16; y++) {
 873         for (x = 0; x < 16; x++) {
 874             acc += FFABS(src[x + y * stride] - ref);
 875         }
 876     }
 877
 878     return acc;
 879 }
 880
 881 static int get_intra_count(MpegEncContext *s, uint8_t *src,
 882                            uint8_t *ref, int stride)
 883 {
 884     int x, y, w, h;
 885     int acc = 0;
 886
 887     w = s->width  & ~15;
 888     h = s->height & ~15;
 889
 890     for (y = 0; y < h; y += 16) {
 891         for (x = 0; x < w; x += 16) {
 892             int offset = x + y * stride;
 893             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
 894                                      16);
 895             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
 896             int sae  = get_sae(src + offset, mean, stride);
 897
 898             acc += sae + 500 < sad;
 899         }
 900     }
 901     return acc;
 902 }
 903
 904
 905 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
 906 {
 907     Picture *pic = NULL;
 908     int64_t pts;
 909     int i, display_picture_number = 0, ret;
 910     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
 911                                                  (s->low_delay ? 0 : 1);
 912     int direct = 1;
 913
 914     if (pic_arg) {
 915         pts = pic_arg->pts;
 916         display_picture_number = s->input_picture_number++;
 917
 918         if (pts != AV_NOPTS_VALUE) {
 919             if (s->user_specified_pts != AV_NOPTS_VALUE) {
 920                 int64_t time = pts;
 921                 int64_t last = s->user_specified_pts;
 922
 923                 if (time <= last) {
 924                     av_log(s->avctx, AV_LOG_ERROR,
 925                            "Error, Invalid timestamp=%"PRId64", "
 926                            "last=%"PRId64"\n", pts, s->user_specified_pts);
 927                     return -1;
 928                 }
 929
 930                 if (!s->low_delay && display_picture_number == 1)
 931                     s->dts_delta = time - last;
 932             }
 933             s->user_specified_pts = pts;
 934         } else {
 935             if (s->user_specified_pts != AV_NOPTS_VALUE) {
 936                 s->user_specified_pts =
 937                 pts = s->user_specified_pts + 1;
 938                 av_log(s->avctx, AV_LOG_INFO,
 939                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
 940                        pts);
 941             } else {
 942                 pts = display_picture_number;
 943             }
 944         }
 945     }
 946
 947     if (pic_arg) {
 948         if (!pic_arg->buf[0]);
 949             direct = 0;
 950         if (pic_arg->linesize[0] != s->linesize)
 951             direct = 0;
 952         if (pic_arg->linesize[1] != s->uvlinesize)
 953             direct = 0;
 954         if (pic_arg->linesize[2] != s->uvlinesize)
 955             direct = 0;
 956
 957         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
 958                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
 959
 960         if (direct) {
 961             i = ff_find_unused_picture(s, 1);
 962             if (i < 0)
 963                 return i;
 964
 965             pic = &s->picture[i];
 966             pic->reference = 3;
 967
 968             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
 969                 return ret;
 970             if (ff_alloc_picture(s, pic, 1) < 0) {
 971                 return -1;
 972             }
 973         } else {
 974             i = ff_find_unused_picture(s, 0);
 975             if (i < 0)
 976                 return i;
 977
 978             pic = &s->picture[i];
 979             pic->reference = 3;
 980
 981             if (ff_alloc_picture(s, pic, 0) < 0) {
 982                 return -1;
 983             }
 984
 985             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
 986                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
 987                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
 988                 // empty
 989             } else {
 990                 int h_chroma_shift, v_chroma_shift;
 991                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
 992                                                  &h_chroma_shift,
 993                                                  &v_chroma_shift);
 994
 995                 for (i = 0; i < 3; i++) {
 996                     int src_stride = pic_arg->linesize[i];
 997                     int dst_stride = i ? s->uvlinesize : s->linesize;
 998                     int h_shift = i ? h_chroma_shift : 0;
 999                     int v_shift = i ? v_chroma_shift : 0;
1000                     int w = s->width  >> h_shift;
1001                     int h = s->height >> v_shift;
1002                     uint8_t *src = pic_arg->data[i];
1003                     uint8_t *dst = pic->f->data[i];
1004
1005                     if (!s->avctx->rc_buffer_size)
1006                         dst += INPLACE_OFFSET;
1007
1008                     if (src_stride == dst_stride)
1009                         memcpy(dst, src, src_stride * h);
1010                     else {
1011                         while (h--) {
1012                             memcpy(dst, src, w);
1013                             dst += dst_stride;
1014                             src += src_stride;
1015                         }
1016                     }
1017                 }
1018             }
1019         }
1020         ret = av_frame_copy_props(pic->f, pic_arg);
1021         if (ret < 0)
1022             return ret;
1023
1024         pic->f->display_picture_number = display_picture_number;
1025         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1026     }
1027
1028     /* shift buffer entries */
1029     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1030         s->input_picture[i - 1] = s->input_picture[i];
1031
1032     s->input_picture[encoding_delay] = (Picture*) pic;
1033
1034     return 0;
1035 }
1036
1037 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1038 {
1039     int x, y, plane;
1040     int score = 0;
1041     int64_t score64 = 0;
1042
1043     for (plane = 0; plane < 3; plane++) {
1044         const int stride = p->f->linesize[plane];
1045         const int bw = plane ? 1 : 2;
1046         for (y = 0; y < s->mb_height * bw; y++) {
1047             for (x = 0; x < s->mb_width * bw; x++) {
1048                 int off = p->shared ? 0 : 16;
1049                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1050                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1051                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1052
1053                 switch (s->avctx->frame_skip_exp) {
1054                 case 0: score    =  FFMAX(score, v);          break;
1055                 case 1: score   += FFABS(v);                  break;
1056                 case 2: score   += v * v;                     break;
1057                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1058                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1059                 }
1060             }
1061         }
1062     }
1063
1064     if (score)
1065         score64 = score;
1066
1067     if (score64 < s->avctx->frame_skip_threshold)
1068         return 1;
1069     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1070         return 1;
1071     return 0;
1072 }
1073
1074 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1075 {
1076     AVPacket pkt = { 0 };
1077     int ret, got_output;
1078
1079     av_init_packet(&pkt);
1080     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1081     if (ret < 0)
1082         return ret;
1083
1084     ret = pkt.size;
1085     av_free_packet(&pkt);
1086     return ret;
1087 }
1088
1089 static int estimate_best_b_count(MpegEncContext *s)
1090 {
1091     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1092     AVCodecContext *c = avcodec_alloc_context3(NULL);
1093     const int scale = s->avctx->brd_scale;
1094     int i, j, out_size, p_lambda, b_lambda, lambda2;
1095     int64_t best_rd  = INT64_MAX;
1096     int best_b_count = -1;
1097
1098     assert(scale >= 0 && scale <= 3);
1099
1100     //emms_c();
1101     //s->next_picture_ptr->quality;
1102     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1103     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1104     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1105     if (!b_lambda) // FIXME we should do this somewhere else
1106         b_lambda = p_lambda;
1107     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1108                FF_LAMBDA_SHIFT;
1109
1110     c->width        = s->width  >> scale;
1111     c->height       = s->height >> scale;
1112     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1113     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1114     c->mb_decision  = s->avctx->mb_decision;
1115     c->me_cmp       = s->avctx->me_cmp;
1116     c->mb_cmp       = s->avctx->mb_cmp;
1117     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1118     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1119     c->time_base    = s->avctx->time_base;
1120     c->max_b_frames = s->max_b_frames;
1121
1122     if (avcodec_open2(c, codec, NULL) < 0)
1123         return -1;
1124
1125     for (i = 0; i < s->max_b_frames + 2; i++) {
1126         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1127                                                 s->next_picture_ptr;
1128
1129         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1130             pre_input = *pre_input_ptr;
1131
1132             if (!pre_input.shared && i) {
1133                 pre_input.f->data[0] += INPLACE_OFFSET;
1134                 pre_input.f->data[1] += INPLACE_OFFSET;
1135                 pre_input.f->data[2] += INPLACE_OFFSET;
1136             }
1137
1138             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1139                                  pre_input.f->data[0], pre_input.f->linesize[0],
1140                                  c->width,      c->height);
1141             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1142                                  pre_input.f->data[1], pre_input.f->linesize[1],
1143                                  c->width >> 1, c->height >> 1);
1144             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1145                                  pre_input.f->data[2], pre_input.f->linesize[2],
1146                                  c->width >> 1, c->height >> 1);
1147         }
1148     }
1149
1150     for (j = 0; j < s->max_b_frames + 1; j++) {
1151         int64_t rd = 0;
1152
1153         if (!s->input_picture[j])
1154             break;
1155
1156         c->error[0] = c->error[1] = c->error[2] = 0;
1157
1158         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1159         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1160
1161         out_size = encode_frame(c, s->tmp_frames[0]);
1162
1163         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1164
1165         for (i = 0; i < s->max_b_frames + 1; i++) {
1166             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1167
1168             s->tmp_frames[i + 1]->pict_type = is_p ?
1169                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1170             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1171
1172             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1173
1174             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1175         }
1176
1177         /* get the delayed frames */
1178         while (out_size) {
1179             out_size = encode_frame(c, NULL);
1180             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1181         }
1182
1183         rd += c->error[0] + c->error[1] + c->error[2];
1184
1185         if (rd < best_rd) {
1186             best_rd = rd;
1187             best_b_count = j;
1188         }
1189     }
1190
1191     avcodec_close(c);
1192     av_freep(&c);
1193
1194     return best_b_count;
1195 }
1196
1197 static int select_input_picture(MpegEncContext *s)
1198 {
1199     int i, ret;
1200
1201     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1202         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1203     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1204
1205     /* set next picture type & ordering */
1206     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1207         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1208             s->next_picture_ptr == NULL || s->intra_only) {
1209             s->reordered_input_picture[0] = s->input_picture[0];
1210             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1211             s->reordered_input_picture[0]->f->coded_picture_number =
1212                 s->coded_picture_number++;
1213         } else {
1214             int b_frames;
1215
1216             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1217                 if (s->picture_in_gop_number < s->gop_size &&
1218                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1219                     // FIXME check that te gop check above is +-1 correct
1220                     av_frame_unref(s->input_picture[0]->f);
1221
1222                     emms_c();
1223                     ff_vbv_update(s, 0);
1224
1225                     goto no_output_pic;
1226                 }
1227             }
1228
1229             if (s->flags & CODEC_FLAG_PASS2) {
1230                 for (i = 0; i < s->max_b_frames + 1; i++) {
1231                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1232
1233                     if (pict_num >= s->rc_context.num_entries)
1234                         break;
1235                     if (!s->input_picture[i]) {
1236                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1237                         break;
1238                     }
1239
1240                     s->input_picture[i]->f->pict_type =
1241                         s->rc_context.entry[pict_num].new_pict_type;
1242                 }
1243             }
1244
1245             if (s->avctx->b_frame_strategy == 0) {
1246                 b_frames = s->max_b_frames;
1247                 while (b_frames && !s->input_picture[b_frames])
1248                     b_frames--;
1249             } else if (s->avctx->b_frame_strategy == 1) {
1250                 for (i = 1; i < s->max_b_frames + 1; i++) {
1251                     if (s->input_picture[i] &&
1252                         s->input_picture[i]->b_frame_score == 0) {
1253                         s->input_picture[i]->b_frame_score =
1254                             get_intra_count(s,
1255                                             s->input_picture[i    ]->f->data[0],
1256                                             s->input_picture[i - 1]->f->data[0],
1257                                             s->linesize) + 1;
1258                     }
1259                 }
1260                 for (i = 0; i < s->max_b_frames + 1; i++) {
1261                     if (s->input_picture[i] == NULL ||
1262                         s->input_picture[i]->b_frame_score - 1 >
1263                             s->mb_num / s->avctx->b_sensitivity)
1264                         break;
1265                 }
1266
1267                 b_frames = FFMAX(0, i - 1);
1268
1269                 /* reset scores */
1270                 for (i = 0; i < b_frames + 1; i++) {
1271                     s->input_picture[i]->b_frame_score = 0;
1272                 }
1273             } else if (s->avctx->b_frame_strategy == 2) {
1274                 b_frames = estimate_best_b_count(s);
1275             } else {
1276                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1277                 b_frames = 0;
1278             }
1279
1280             emms_c();
1281
1282             for (i = b_frames - 1; i >= 0; i--) {
1283                 int type = s->input_picture[i]->f->pict_type;
1284                 if (type && type != AV_PICTURE_TYPE_B)
1285                     b_frames = i;
1286             }
1287             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1288                 b_frames == s->max_b_frames) {
1289                 av_log(s->avctx, AV_LOG_ERROR,
1290                        "warning, too many b frames in a row\n");
1291             }
1292
1293             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1294                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1295                     s->gop_size > s->picture_in_gop_number) {
1296                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1297                 } else {
1298                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1299                         b_frames = 0;
1300                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1301                 }
1302             }
1303
1304             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1305                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1306                 b_frames--;
1307
1308             s->reordered_input_picture[0] = s->input_picture[b_frames];
1309             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1310                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1311             s->reordered_input_picture[0]->f->coded_picture_number =
1312                 s->coded_picture_number++;
1313             for (i = 0; i < b_frames; i++) {
1314                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1315                 s->reordered_input_picture[i + 1]->f->pict_type =
1316                     AV_PICTURE_TYPE_B;
1317                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1318                     s->coded_picture_number++;
1319             }
1320         }
1321     }
1322 no_output_pic:
1323     if (s->reordered_input_picture[0]) {
1324         s->reordered_input_picture[0]->reference =
1325            s->reordered_input_picture[0]->f->pict_type !=
1326                AV_PICTURE_TYPE_B ? 3 : 0;
1327
1328         ff_mpeg_unref_picture(s, &s->new_picture);
1329         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1330             return ret;
1331
1332         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1333             // input is a shared pix, so we can't modifiy it -> alloc a new
1334             // one & ensure that the shared one is reuseable
1335
1336             Picture *pic;
1337             int i = ff_find_unused_picture(s, 0);
1338             if (i < 0)
1339                 return i;
1340             pic = &s->picture[i];
1341
1342             pic->reference = s->reordered_input_picture[0]->reference;
1343             if (ff_alloc_picture(s, pic, 0) < 0) {
1344                 return -1;
1345             }
1346
1347             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1348             if (ret < 0)
1349                 return ret;
1350
1351             /* mark us unused / free shared pic */
1352             av_frame_unref(s->reordered_input_picture[0]->f);
1353             s->reordered_input_picture[0]->shared = 0;
1354
1355             s->current_picture_ptr = pic;
1356         } else {
1357             // input is not a shared pix -> reuse buffer for current_pix
1358             s->current_picture_ptr = s->reordered_input_picture[0];
1359             for (i = 0; i < 4; i++) {
1360                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1361             }
1362         }
1363         ff_mpeg_unref_picture(s, &s->current_picture);
1364         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1365                                        s->current_picture_ptr)) < 0)
1366             return ret;
1367
1368         s->picture_number = s->new_picture.f->display_picture_number;
1369     } else {
1370         ff_mpeg_unref_picture(s, &s->new_picture);
1371     }
1372     return 0;
1373 }
1374
1375 static void frame_end(MpegEncContext *s)
1376 {
1377     int i;
1378
1379     if (s->unrestricted_mv &&
1380         s->current_picture.reference &&
1381         !s->intra_only) {
1382         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1383         int hshift = desc->log2_chroma_w;
1384         int vshift = desc->log2_chroma_h;
1385         s->dsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1386                           s->h_edge_pos, s->v_edge_pos,
1387                           EDGE_WIDTH, EDGE_WIDTH,
1388                           EDGE_TOP | EDGE_BOTTOM);
1389         s->dsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1390                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1391                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1392                           EDGE_TOP | EDGE_BOTTOM);
1393         s->dsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1394                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1395                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1396                           EDGE_TOP | EDGE_BOTTOM);
1397     }
1398
1399     emms_c();
1400
1401     s->last_pict_type                 = s->pict_type;
1402     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1403     if (s->pict_type!= AV_PICTURE_TYPE_B)
1404         s->last_non_b_pict_type = s->pict_type;
1405
1406     if (s->encoding) {
1407         /* release non-reference frames */
1408         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1409             if (!s->picture[i].reference)
1410                 ff_mpeg_unref_picture(s, &s->picture[i]);
1411         }
1412     }
1413
1414     s->avctx->coded_frame = s->current_picture_ptr->f;
1415
1416 }
1417
1418 static void update_noise_reduction(MpegEncContext *s)
1419 {
1420     int intra, i;
1421
1422     for (intra = 0; intra < 2; intra++) {
1423         if (s->dct_count[intra] > (1 << 16)) {
1424             for (i = 0; i < 64; i++) {
1425                 s->dct_error_sum[intra][i] >>= 1;
1426             }
1427             s->dct_count[intra] >>= 1;
1428         }
1429
1430         for (i = 0; i < 64; i++) {
1431             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1432                                        s->dct_count[intra] +
1433                                        s->dct_error_sum[intra][i] / 2) /
1434                                       (s->dct_error_sum[intra][i] + 1);
1435         }
1436     }
1437 }
1438
1439 static int frame_start(MpegEncContext *s)
1440 {
1441     int ret;
1442
1443     /* mark & release old frames */
1444     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1445         s->last_picture_ptr != s->next_picture_ptr &&
1446         s->last_picture_ptr->f->buf[0]) {
1447         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1448     }
1449
1450     s->current_picture_ptr->f->pict_type = s->pict_type;
1451     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1452
1453     ff_mpeg_unref_picture(s, &s->current_picture);
1454     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1455                                    s->current_picture_ptr)) < 0)
1456         return ret;
1457
1458     if (s->pict_type != AV_PICTURE_TYPE_B) {
1459         s->last_picture_ptr = s->next_picture_ptr;
1460         if (!s->droppable)
1461             s->next_picture_ptr = s->current_picture_ptr;
1462     }
1463
1464     if (s->last_picture_ptr) {
1465         ff_mpeg_unref_picture(s, &s->last_picture);
1466         if (s->last_picture_ptr->f->buf[0] &&
1467             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1468                                        s->last_picture_ptr)) < 0)
1469             return ret;
1470     }
1471     if (s->next_picture_ptr) {
1472         ff_mpeg_unref_picture(s, &s->next_picture);
1473         if (s->next_picture_ptr->f->buf[0] &&
1474             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1475                                        s->next_picture_ptr)) < 0)
1476             return ret;
1477     }
1478
1479     if (s->picture_structure!= PICT_FRAME) {
1480         int i;
1481         for (i = 0; i < 4; i++) {
1482             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1483                 s->current_picture.f->data[i] +=
1484                     s->current_picture.f->linesize[i];
1485             }
1486             s->current_picture.f->linesize[i] *= 2;
1487             s->last_picture.f->linesize[i]    *= 2;
1488             s->next_picture.f->linesize[i]    *= 2;
1489         }
1490     }
1491
1492     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1493         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1494         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1495     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1496         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1497         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1498     } else {
1499         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1500         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1501     }
1502
1503     if (s->dct_error_sum) {
1504         assert(s->avctx->noise_reduction && s->encoding);
1505         update_noise_reduction(s);
1506     }
1507
1508     return 0;
1509 }
1510
1511 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1512                           const AVFrame *pic_arg, int *got_packet)
1513 {
1514     MpegEncContext *s = avctx->priv_data;
1515     int i, stuffing_count, ret;
1516     int context_count = s->slice_context_count;
1517
1518     s->picture_in_gop_number++;
1519
1520     if (load_input_picture(s, pic_arg) < 0)
1521         return -1;
1522
1523     if (select_input_picture(s) < 0) {
1524         return -1;
1525     }
1526
1527     /* output? */
1528     if (s->new_picture.f->data[0]) {
1529         if (!pkt->data &&
1530             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1531             return ret;
1532         if (s->mb_info) {
1533             s->mb_info_ptr = av_packet_new_side_data(pkt,
1534                                  AV_PKT_DATA_H263_MB_INFO,
1535                                  s->mb_width*s->mb_height*12);
1536             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1537         }
1538
1539         for (i = 0; i < context_count; i++) {
1540             int start_y = s->thread_context[i]->start_mb_y;
1541             int   end_y = s->thread_context[i]->  end_mb_y;
1542             int h       = s->mb_height;
1543             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1544             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1545
1546             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1547         }
1548
1549         s->pict_type = s->new_picture.f->pict_type;
1550         //emms_c();
1551         ret = frame_start(s);
1552         if (ret < 0)
1553             return ret;
1554 vbv_retry:
1555         if (encode_picture(s, s->picture_number) < 0)
1556             return -1;
1557
1558         avctx->header_bits = s->header_bits;
1559         avctx->mv_bits     = s->mv_bits;
1560         avctx->misc_bits   = s->misc_bits;
1561         avctx->i_tex_bits  = s->i_tex_bits;
1562         avctx->p_tex_bits  = s->p_tex_bits;
1563         avctx->i_count     = s->i_count;
1564         // FIXME f/b_count in avctx
1565         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1566         avctx->skip_count  = s->skip_count;
1567
1568         frame_end(s);
1569
1570         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1571             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1572
1573         if (avctx->rc_buffer_size) {
1574             RateControlContext *rcc = &s->rc_context;
1575             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1576
1577             if (put_bits_count(&s->pb) > max_size &&
1578                 s->lambda < s->avctx->lmax) {
1579                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1580                                        (s->qscale + 1) / s->qscale);
1581                 if (s->adaptive_quant) {
1582                     int i;
1583                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1584                         s->lambda_table[i] =
1585                             FFMAX(s->lambda_table[i] + 1,
1586                                   s->lambda_table[i] * (s->qscale + 1) /
1587                                   s->qscale);
1588                 }
1589                 s->mb_skipped = 0;        // done in frame_start()
1590                 // done in encode_picture() so we must undo it
1591                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1592                     if (s->flipflop_rounding          ||
1593                         s->codec_id == AV_CODEC_ID_H263P ||
1594                         s->codec_id == AV_CODEC_ID_MPEG4)
1595                         s->no_rounding ^= 1;
1596                 }
1597                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1598                     s->time_base       = s->last_time_base;
1599                     s->last_non_b_time = s->time - s->pp_time;
1600                 }
1601                 for (i = 0; i < context_count; i++) {
1602                     PutBitContext *pb = &s->thread_context[i]->pb;
1603                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1604                 }
1605                 goto vbv_retry;
1606             }
1607
1608             assert(s->avctx->rc_max_rate);
1609         }
1610
1611         if (s->flags & CODEC_FLAG_PASS1)
1612             ff_write_pass1_stats(s);
1613
1614         for (i = 0; i < 4; i++) {
1615             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1616             avctx->error[i] += s->current_picture_ptr->f->error[i];
1617         }
1618
1619         if (s->flags & CODEC_FLAG_PASS1)
1620             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1621                    avctx->i_tex_bits + avctx->p_tex_bits ==
1622                        put_bits_count(&s->pb));
1623         flush_put_bits(&s->pb);
1624         s->frame_bits  = put_bits_count(&s->pb);
1625
1626         stuffing_count = ff_vbv_update(s, s->frame_bits);
1627         if (stuffing_count) {
1628             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1629                     stuffing_count + 50) {
1630                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1631                 return -1;
1632             }
1633
1634             switch (s->codec_id) {
1635             case AV_CODEC_ID_MPEG1VIDEO:
1636             case AV_CODEC_ID_MPEG2VIDEO:
1637                 while (stuffing_count--) {
1638                     put_bits(&s->pb, 8, 0);
1639                 }
1640             break;
1641             case AV_CODEC_ID_MPEG4:
1642                 put_bits(&s->pb, 16, 0);
1643                 put_bits(&s->pb, 16, 0x1C3);
1644                 stuffing_count -= 4;
1645                 while (stuffing_count--) {
1646                     put_bits(&s->pb, 8, 0xFF);
1647                 }
1648             break;
1649             default:
1650                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1651             }
1652             flush_put_bits(&s->pb);
1653             s->frame_bits  = put_bits_count(&s->pb);
1654         }
1655
1656         /* update mpeg1/2 vbv_delay for CBR */
1657         if (s->avctx->rc_max_rate                          &&
1658             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1659             s->out_format == FMT_MPEG1                     &&
1660             90000LL * (avctx->rc_buffer_size - 1) <=
1661                 s->avctx->rc_max_rate * 0xFFFFLL) {
1662             int vbv_delay, min_delay;
1663             double inbits  = s->avctx->rc_max_rate *
1664                              av_q2d(s->avctx->time_base);
1665             int    minbits = s->frame_bits - 8 *
1666                              (s->vbv_delay_ptr - s->pb.buf - 1);
1667             double bits    = s->rc_context.buffer_index + minbits - inbits;
1668
1669             if (bits < 0)
1670                 av_log(s->avctx, AV_LOG_ERROR,
1671                        "Internal error, negative bits\n");
1672
1673             assert(s->repeat_first_field == 0);
1674
1675             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1676             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1677                         s->avctx->rc_max_rate;
1678
1679             vbv_delay = FFMAX(vbv_delay, min_delay);
1680
1681             assert(vbv_delay < 0xFFFF);
1682
1683             s->vbv_delay_ptr[0] &= 0xF8;
1684             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1685             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1686             s->vbv_delay_ptr[2] &= 0x07;
1687             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1688             avctx->vbv_delay     = vbv_delay * 300;
1689         }
1690         s->total_bits     += s->frame_bits;
1691         avctx->frame_bits  = s->frame_bits;
1692
1693         pkt->pts = s->current_picture.f->pts;
1694         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1695             if (!s->current_picture.f->coded_picture_number)
1696                 pkt->dts = pkt->pts - s->dts_delta;
1697             else
1698                 pkt->dts = s->reordered_pts;
1699             s->reordered_pts = pkt->pts;
1700         } else
1701             pkt->dts = pkt->pts;
1702         if (s->current_picture.f->key_frame)
1703             pkt->flags |= AV_PKT_FLAG_KEY;
1704         if (s->mb_info)
1705             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1706     } else {
1707         s->frame_bits = 0;
1708     }
1709     assert((s->frame_bits & 7) == 0);
1710
1711     pkt->size = s->frame_bits / 8;
1712     *got_packet = !!pkt->size;
1713     return 0;
1714 }
1715
1716 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1717                                                 int n, int threshold)
1718 {
1719     static const char tab[64] = {
1720         3, 2, 2, 1, 1, 1, 1, 1,
1721         1, 1, 1, 1, 1, 1, 1, 1,
1722         1, 1, 1, 1, 1, 1, 1, 1,
1723         0, 0, 0, 0, 0, 0, 0, 0,
1724         0, 0, 0, 0, 0, 0, 0, 0,
1725         0, 0, 0, 0, 0, 0, 0, 0,
1726         0, 0, 0, 0, 0, 0, 0, 0,
1727         0, 0, 0, 0, 0, 0, 0, 0
1728     };
1729     int score = 0;
1730     int run = 0;
1731     int i;
1732     int16_t *block = s->block[n];
1733     const int last_index = s->block_last_index[n];
1734     int skip_dc;
1735
1736     if (threshold < 0) {
1737         skip_dc = 0;
1738         threshold = -threshold;
1739     } else
1740         skip_dc = 1;
1741
1742     /* Are all we could set to zero already zero? */
1743     if (last_index <= skip_dc - 1)
1744         return;
1745
1746     for (i = 0; i <= last_index; i++) {
1747         const int j = s->intra_scantable.permutated[i];
1748         const int level = FFABS(block[j]);
1749         if (level == 1) {
1750             if (skip_dc && i == 0)
1751                 continue;
1752             score += tab[run];
1753             run = 0;
1754         } else if (level > 1) {
1755             return;
1756         } else {
1757             run++;
1758         }
1759     }
1760     if (score >= threshold)
1761         return;
1762     for (i = skip_dc; i <= last_index; i++) {
1763         const int j = s->intra_scantable.permutated[i];
1764         block[j] = 0;
1765     }
1766     if (block[0])
1767         s->block_last_index[n] = 0;
1768     else
1769         s->block_last_index[n] = -1;
1770 }
1771
1772 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1773                                int last_index)
1774 {
1775     int i;
1776     const int maxlevel = s->max_qcoeff;
1777     const int minlevel = s->min_qcoeff;
1778     int overflow = 0;
1779
1780     if (s->mb_intra) {
1781         i = 1; // skip clipping of intra dc
1782     } else
1783         i = 0;
1784
1785     for (; i <= last_index; i++) {
1786         const int j = s->intra_scantable.permutated[i];
1787         int level = block[j];
1788
1789         if (level > maxlevel) {
1790             level = maxlevel;
1791             overflow++;
1792         } else if (level < minlevel) {
1793             level = minlevel;
1794             overflow++;
1795         }
1796
1797         block[j] = level;
1798     }
1799
1800     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1801         av_log(s->avctx, AV_LOG_INFO,
1802                "warning, clipping %d dct coefficients to %d..%d\n",
1803                overflow, minlevel, maxlevel);
1804 }
1805
1806 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1807 {
1808     int x, y;
1809     // FIXME optimize
1810     for (y = 0; y < 8; y++) {
1811         for (x = 0; x < 8; x++) {
1812             int x2, y2;
1813             int sum = 0;
1814             int sqr = 0;
1815             int count = 0;
1816
1817             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1818                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1819                     int v = ptr[x2 + y2 * stride];
1820                     sum += v;
1821                     sqr += v * v;
1822                     count++;
1823                 }
1824             }
1825             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1826         }
1827     }
1828 }
1829
1830 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1831                                                 int motion_x, int motion_y,
1832                                                 int mb_block_height,
1833                                                 int mb_block_count)
1834 {
1835     int16_t weight[8][64];
1836     int16_t orig[8][64];
1837     const int mb_x = s->mb_x;
1838     const int mb_y = s->mb_y;
1839     int i;
1840     int skip_dct[8];
1841     int dct_offset = s->linesize * 8; // default for progressive frames
1842     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1843     ptrdiff_t wrap_y, wrap_c;
1844
1845     for (i = 0; i < mb_block_count; i++)
1846         skip_dct[i] = s->skipdct;
1847
1848     if (s->adaptive_quant) {
1849         const int last_qp = s->qscale;
1850         const int mb_xy = mb_x + mb_y * s->mb_stride;
1851
1852         s->lambda = s->lambda_table[mb_xy];
1853         update_qscale(s);
1854
1855         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1856             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1857             s->dquant = s->qscale - last_qp;
1858
1859             if (s->out_format == FMT_H263) {
1860                 s->dquant = av_clip(s->dquant, -2, 2);
1861
1862                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1863                     if (!s->mb_intra) {
1864                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1865                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1866                                 s->dquant = 0;
1867                         }
1868                         if (s->mv_type == MV_TYPE_8X8)
1869                             s->dquant = 0;
1870                     }
1871                 }
1872             }
1873         }
1874         ff_set_qscale(s, last_qp + s->dquant);
1875     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1876         ff_set_qscale(s, s->qscale + s->dquant);
1877
1878     wrap_y = s->linesize;
1879     wrap_c = s->uvlinesize;
1880     ptr_y  = s->new_picture.f->data[0] +
1881              (mb_y * 16 * wrap_y)              + mb_x * 16;
1882     ptr_cb = s->new_picture.f->data[1] +
1883              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1884     ptr_cr = s->new_picture.f->data[2] +
1885              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1886
1887     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1888         uint8_t *ebuf = s->edge_emu_buffer + 32;
1889         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1890                                  wrap_y, wrap_y,
1891                                  16, 16, mb_x * 16, mb_y * 16,
1892                                  s->width, s->height);
1893         ptr_y = ebuf;
1894         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1895                                  wrap_c, wrap_c,
1896                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1897                                  s->width >> 1, s->height >> 1);
1898         ptr_cb = ebuf + 18 * wrap_y;
1899         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1900                                  wrap_c, wrap_c,
1901                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1902                                  s->width >> 1, s->height >> 1);
1903         ptr_cr = ebuf + 18 * wrap_y + 8;
1904     }
1905
1906     if (s->mb_intra) {
1907         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1908             int progressive_score, interlaced_score;
1909
1910             s->interlaced_dct = 0;
1911             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1912                                                     NULL, wrap_y, 8) +
1913                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1914                                                     NULL, wrap_y, 8) - 400;
1915
1916             if (progressive_score > 0) {
1917                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1918                                                        NULL, wrap_y * 2, 8) +
1919                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1920                                                        NULL, wrap_y * 2, 8);
1921                 if (progressive_score > interlaced_score) {
1922                     s->interlaced_dct = 1;
1923
1924                     dct_offset = wrap_y;
1925                     wrap_y <<= 1;
1926                     if (s->chroma_format == CHROMA_422)
1927                         wrap_c <<= 1;
1928                 }
1929             }
1930         }
1931
1932         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1933         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1934         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1935         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1936
1937         if (s->flags & CODEC_FLAG_GRAY) {
1938             skip_dct[4] = 1;
1939             skip_dct[5] = 1;
1940         } else {
1941             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1942             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1943             if (!s->chroma_y_shift) { /* 422 */
1944                 s->dsp.get_pixels(s->block[6],
1945                                   ptr_cb + (dct_offset >> 1), wrap_c);
1946                 s->dsp.get_pixels(s->block[7],
1947                                   ptr_cr + (dct_offset >> 1), wrap_c);
1948             }
1949         }
1950     } else {
1951         op_pixels_func (*op_pix)[4];
1952         qpel_mc_func (*op_qpix)[16];
1953         uint8_t *dest_y, *dest_cb, *dest_cr;
1954
1955         dest_y  = s->dest[0];
1956         dest_cb = s->dest[1];
1957         dest_cr = s->dest[2];
1958
1959         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1960             op_pix  = s->hdsp.put_pixels_tab;
1961             op_qpix = s->qdsp.put_qpel_pixels_tab;
1962         } else {
1963             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1964             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
1965         }
1966
1967         if (s->mv_dir & MV_DIR_FORWARD) {
1968             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1969                           s->last_picture.f->data,
1970                           op_pix, op_qpix);
1971             op_pix  = s->hdsp.avg_pixels_tab;
1972             op_qpix = s->qdsp.avg_qpel_pixels_tab;
1973         }
1974         if (s->mv_dir & MV_DIR_BACKWARD) {
1975             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1976                           s->next_picture.f->data,
1977                           op_pix, op_qpix);
1978         }
1979
1980         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1981             int progressive_score, interlaced_score;
1982
1983             s->interlaced_dct = 0;
1984             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1985                                                     ptr_y,              wrap_y,
1986                                                     8) +
1987                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1988                                                     ptr_y + wrap_y * 8, wrap_y,
1989                                                     8) - 400;
1990
1991             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1992                 progressive_score -= 400;
1993
1994             if (progressive_score > 0) {
1995                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1996                                                        ptr_y,
1997                                                        wrap_y * 2, 8) +
1998                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1999                                                        ptr_y + wrap_y,
2000                                                        wrap_y * 2, 8);
2001
2002                 if (progressive_score > interlaced_score) {
2003                     s->interlaced_dct = 1;
2004
2005                     dct_offset = wrap_y;
2006                     wrap_y <<= 1;
2007                     if (s->chroma_format == CHROMA_422)
2008                         wrap_c <<= 1;
2009                 }
2010             }
2011         }
2012
2013         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2014         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2015         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2016                            dest_y + dct_offset, wrap_y);
2017         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2018                            dest_y + dct_offset + 8, wrap_y);
2019
2020         if (s->flags & CODEC_FLAG_GRAY) {
2021             skip_dct[4] = 1;
2022             skip_dct[5] = 1;
2023         } else {
2024             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2025             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2026             if (!s->chroma_y_shift) { /* 422 */
2027                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2028                                    dest_cb + (dct_offset >> 1), wrap_c);
2029                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2030                                    dest_cr + (dct_offset >> 1), wrap_c);
2031             }
2032         }
2033         /* pre quantization */
2034         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2035                 2 * s->qscale * s->qscale) {
2036             // FIXME optimize
2037             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2038                               wrap_y, 8) < 20 * s->qscale)
2039                 skip_dct[0] = 1;
2040             if (s->dsp.sad[1](NULL, ptr_y + 8,
2041                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2042                 skip_dct[1] = 1;
2043             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2044                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2045                 skip_dct[2] = 1;
2046             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2047                               dest_y + dct_offset + 8,
2048                               wrap_y, 8) < 20 * s->qscale)
2049                 skip_dct[3] = 1;
2050             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2051                               wrap_c, 8) < 20 * s->qscale)
2052                 skip_dct[4] = 1;
2053             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2054                               wrap_c, 8) < 20 * s->qscale)
2055                 skip_dct[5] = 1;
2056             if (!s->chroma_y_shift) { /* 422 */
2057                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2058                                   dest_cb + (dct_offset >> 1),
2059                                   wrap_c, 8) < 20 * s->qscale)
2060                     skip_dct[6] = 1;
2061                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2062                                   dest_cr + (dct_offset >> 1),
2063                                   wrap_c, 8) < 20 * s->qscale)
2064                     skip_dct[7] = 1;
2065             }
2066         }
2067     }
2068
2069     if (s->quantizer_noise_shaping) {
2070         if (!skip_dct[0])
2071             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2072         if (!skip_dct[1])
2073             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2074         if (!skip_dct[2])
2075             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2076         if (!skip_dct[3])
2077             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2078         if (!skip_dct[4])
2079             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2080         if (!skip_dct[5])
2081             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2082         if (!s->chroma_y_shift) { /* 422 */
2083             if (!skip_dct[6])
2084                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2085                                   wrap_c);
2086             if (!skip_dct[7])
2087                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2088                                   wrap_c);
2089         }
2090         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2091     }
2092
2093     /* DCT & quantize */
2094     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2095     {
2096         for (i = 0; i < mb_block_count; i++) {
2097             if (!skip_dct[i]) {
2098                 int overflow;
2099                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2100                 // FIXME we could decide to change to quantizer instead of
2101                 // clipping
2102                 // JS: I don't think that would be a good idea it could lower
2103                 //     quality instead of improve it. Just INTRADC clipping
2104                 //     deserves changes in quantizer
2105                 if (overflow)
2106                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2107             } else
2108                 s->block_last_index[i] = -1;
2109         }
2110         if (s->quantizer_noise_shaping) {
2111             for (i = 0; i < mb_block_count; i++) {
2112                 if (!skip_dct[i]) {
2113                     s->block_last_index[i] =
2114                         dct_quantize_refine(s, s->block[i], weight[i],
2115                                             orig[i], i, s->qscale);
2116                 }
2117             }
2118         }
2119
2120         if (s->luma_elim_threshold && !s->mb_intra)
2121             for (i = 0; i < 4; i++)
2122                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2123         if (s->chroma_elim_threshold && !s->mb_intra)
2124             for (i = 4; i < mb_block_count; i++)
2125                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2126
2127         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2128             for (i = 0; i < mb_block_count; i++) {
2129                 if (s->block_last_index[i] == -1)
2130                     s->coded_score[i] = INT_MAX / 256;
2131             }
2132         }
2133     }
2134
2135     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2136         s->block_last_index[4] =
2137         s->block_last_index[5] = 0;
2138         s->block[4][0] =
2139         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2140     }
2141
2142     // non c quantize code returns incorrect block_last_index FIXME
2143     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2144         for (i = 0; i < mb_block_count; i++) {
2145             int j;
2146             if (s->block_last_index[i] > 0) {
2147                 for (j = 63; j > 0; j--) {
2148                     if (s->block[i][s->intra_scantable.permutated[j]])
2149                         break;
2150                 }
2151                 s->block_last_index[i] = j;
2152             }
2153         }
2154     }
2155
2156     /* huffman encode */
2157     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2158     case AV_CODEC_ID_MPEG1VIDEO:
2159     case AV_CODEC_ID_MPEG2VIDEO:
2160         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2161             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2162         break;
2163     case AV_CODEC_ID_MPEG4:
2164         if (CONFIG_MPEG4_ENCODER)
2165             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2166         break;
2167     case AV_CODEC_ID_MSMPEG4V2:
2168     case AV_CODEC_ID_MSMPEG4V3:
2169     case AV_CODEC_ID_WMV1:
2170         if (CONFIG_MSMPEG4_ENCODER)
2171             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2172         break;
2173     case AV_CODEC_ID_WMV2:
2174         if (CONFIG_WMV2_ENCODER)
2175             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2176         break;
2177     case AV_CODEC_ID_H261:
2178         if (CONFIG_H261_ENCODER)
2179             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2180         break;
2181     case AV_CODEC_ID_H263:
2182     case AV_CODEC_ID_H263P:
2183     case AV_CODEC_ID_FLV1:
2184     case AV_CODEC_ID_RV10:
2185     case AV_CODEC_ID_RV20:
2186         if (CONFIG_H263_ENCODER)
2187             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2188         break;
2189     case AV_CODEC_ID_MJPEG:
2190         if (CONFIG_MJPEG_ENCODER)
2191             ff_mjpeg_encode_mb(s, s->block);
2192         break;
2193     default:
2194         assert(0);
2195     }
2196 }
2197
2198 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2199 {
2200     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2201     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2202 }
2203
2204 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2205     int i;
2206
2207     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2208
2209     /* mpeg1 */
2210     d->mb_skip_run= s->mb_skip_run;
2211     for(i=0; i<3; i++)
2212         d->last_dc[i] = s->last_dc[i];
2213
2214     /* statistics */
2215     d->mv_bits= s->mv_bits;
2216     d->i_tex_bits= s->i_tex_bits;
2217     d->p_tex_bits= s->p_tex_bits;
2218     d->i_count= s->i_count;
2219     d->f_count= s->f_count;
2220     d->b_count= s->b_count;
2221     d->skip_count= s->skip_count;
2222     d->misc_bits= s->misc_bits;
2223     d->last_bits= 0;
2224
2225     d->mb_skipped= 0;
2226     d->qscale= s->qscale;
2227     d->dquant= s->dquant;
2228
2229     d->esc3_level_length= s->esc3_level_length;
2230 }
2231
2232 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2233     int i;
2234
2235     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2236     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2237
2238     /* mpeg1 */
2239     d->mb_skip_run= s->mb_skip_run;
2240     for(i=0; i<3; i++)
2241         d->last_dc[i] = s->last_dc[i];
2242
2243     /* statistics */
2244     d->mv_bits= s->mv_bits;
2245     d->i_tex_bits= s->i_tex_bits;
2246     d->p_tex_bits= s->p_tex_bits;
2247     d->i_count= s->i_count;
2248     d->f_count= s->f_count;
2249     d->b_count= s->b_count;
2250     d->skip_count= s->skip_count;
2251     d->misc_bits= s->misc_bits;
2252
2253     d->mb_intra= s->mb_intra;
2254     d->mb_skipped= s->mb_skipped;
2255     d->mv_type= s->mv_type;
2256     d->mv_dir= s->mv_dir;
2257     d->pb= s->pb;
2258     if(s->data_partitioning){
2259         d->pb2= s->pb2;
2260         d->tex_pb= s->tex_pb;
2261     }
2262     d->block= s->block;
2263     for(i=0; i<8; i++)
2264         d->block_last_index[i]= s->block_last_index[i];
2265     d->interlaced_dct= s->interlaced_dct;
2266     d->qscale= s->qscale;
2267
2268     d->esc3_level_length= s->esc3_level_length;
2269 }
2270
2271 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2272                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2273                            int *dmin, int *next_block, int motion_x, int motion_y)
2274 {
2275     int score;
2276     uint8_t *dest_backup[3];
2277
2278     copy_context_before_encode(s, backup, type);
2279
2280     s->block= s->blocks[*next_block];
2281     s->pb= pb[*next_block];
2282     if(s->data_partitioning){
2283         s->pb2   = pb2   [*next_block];
2284         s->tex_pb= tex_pb[*next_block];
2285     }
2286
2287     if(*next_block){
2288         memcpy(dest_backup, s->dest, sizeof(s->dest));
2289         s->dest[0] = s->rd_scratchpad;
2290         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2291         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2292         assert(s->linesize >= 32); //FIXME
2293     }
2294
2295     encode_mb(s, motion_x, motion_y);
2296
2297     score= put_bits_count(&s->pb);
2298     if(s->data_partitioning){
2299         score+= put_bits_count(&s->pb2);
2300         score+= put_bits_count(&s->tex_pb);
2301     }
2302
2303     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2304         ff_MPV_decode_mb(s, s->block);
2305
2306         score *= s->lambda2;
2307         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2308     }
2309
2310     if(*next_block){
2311         memcpy(s->dest, dest_backup, sizeof(s->dest));
2312     }
2313
2314     if(score<*dmin){
2315         *dmin= score;
2316         *next_block^=1;
2317
2318         copy_context_after_encode(best, s, type);
2319     }
2320 }
2321
2322 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2323     uint32_t *sq = ff_square_tab + 256;
2324     int acc=0;
2325     int x,y;
2326
2327     if(w==16 && h==16)
2328         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2329     else if(w==8 && h==8)
2330         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2331
2332     for(y=0; y<h; y++){
2333         for(x=0; x<w; x++){
2334             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2335         }
2336     }
2337
2338     assert(acc>=0);
2339
2340     return acc;
2341 }
2342
2343 static int sse_mb(MpegEncContext *s){
2344     int w= 16;
2345     int h= 16;
2346
2347     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2348     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2349
2350     if(w==16 && h==16)
2351       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2352         return  s->dsp.nsse[0](s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2353                +s->dsp.nsse[1](s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2354                +s->dsp.nsse[1](s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2355       }else{
2356         return  s->dsp.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2357                +s->dsp.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2358                +s->dsp.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2359       }
2360     else
2361         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2362                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2363                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2364 }
2365
2366 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2367     MpegEncContext *s= *(void**)arg;
2368
2369
2370     s->me.pre_pass=1;
2371     s->me.dia_size= s->avctx->pre_dia_size;
2372     s->first_slice_line=1;
2373     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2374         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2375             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2376         }
2377         s->first_slice_line=0;
2378     }
2379
2380     s->me.pre_pass=0;
2381
2382     return 0;
2383 }
2384
2385 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2386     MpegEncContext *s= *(void**)arg;
2387
2388     s->me.dia_size= s->avctx->dia_size;
2389     s->first_slice_line=1;
2390     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2391         s->mb_x=0; //for block init below
2392         ff_init_block_index(s);
2393         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2394             s->block_index[0]+=2;
2395             s->block_index[1]+=2;
2396             s->block_index[2]+=2;
2397             s->block_index[3]+=2;
2398
2399             /* compute motion vector & mb_type and store in context */
2400             if(s->pict_type==AV_PICTURE_TYPE_B)
2401                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2402             else
2403                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2404         }
2405         s->first_slice_line=0;
2406     }
2407     return 0;
2408 }
2409
2410 static int mb_var_thread(AVCodecContext *c, void *arg){
2411     MpegEncContext *s= *(void**)arg;
2412     int mb_x, mb_y;
2413
2414     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2415         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2416             int xx = mb_x * 16;
2417             int yy = mb_y * 16;
2418             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2419             int varc;
2420             int sum = s->dsp.pix_sum(pix, s->linesize);
2421
2422             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2423
2424             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2425             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2426             s->me.mb_var_sum_temp    += varc;
2427         }
2428     }
2429     return 0;
2430 }
2431
2432 static void write_slice_end(MpegEncContext *s){
2433     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2434         if(s->partitioned_frame){
2435             ff_mpeg4_merge_partitions(s);
2436         }
2437
2438         ff_mpeg4_stuffing(&s->pb);
2439     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2440         ff_mjpeg_encode_stuffing(&s->pb);
2441     }
2442
2443     avpriv_align_put_bits(&s->pb);
2444     flush_put_bits(&s->pb);
2445
2446     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2447         s->misc_bits+= get_bits_diff(s);
2448 }
2449
2450 static void write_mb_info(MpegEncContext *s)
2451 {
2452     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2453     int offset = put_bits_count(&s->pb);
2454     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2455     int gobn = s->mb_y / s->gob_index;
2456     int pred_x, pred_y;
2457     if (CONFIG_H263_ENCODER)
2458         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2459     bytestream_put_le32(&ptr, offset);
2460     bytestream_put_byte(&ptr, s->qscale);
2461     bytestream_put_byte(&ptr, gobn);
2462     bytestream_put_le16(&ptr, mba);
2463     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2464     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2465     /* 4MV not implemented */
2466     bytestream_put_byte(&ptr, 0); /* hmv2 */
2467     bytestream_put_byte(&ptr, 0); /* vmv2 */
2468 }
2469
2470 static void update_mb_info(MpegEncContext *s, int startcode)
2471 {
2472     if (!s->mb_info)
2473         return;
2474     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2475         s->mb_info_size += 12;
2476         s->prev_mb_info = s->last_mb_info;
2477     }
2478     if (startcode) {
2479         s->prev_mb_info = put_bits_count(&s->pb)/8;
2480         /* This might have incremented mb_info_size above, and we return without
2481          * actually writing any info into that slot yet. But in that case,
2482          * this will be called again at the start of the after writing the
2483          * start code, actually writing the mb info. */
2484         return;
2485     }
2486
2487     s->last_mb_info = put_bits_count(&s->pb)/8;
2488     if (!s->mb_info_size)
2489         s->mb_info_size += 12;
2490     write_mb_info(s);
2491 }
2492
2493 static int encode_thread(AVCodecContext *c, void *arg){
2494     MpegEncContext *s= *(void**)arg;
2495     int mb_x, mb_y, pdif = 0;
2496     int chr_h= 16>>s->chroma_y_shift;
2497     int i, j;
2498     MpegEncContext best_s, backup_s;
2499     uint8_t bit_buf[2][MAX_MB_BYTES];
2500     uint8_t bit_buf2[2][MAX_MB_BYTES];
2501     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2502     PutBitContext pb[2], pb2[2], tex_pb[2];
2503
2504     for(i=0; i<2; i++){
2505         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2506         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2507         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2508     }
2509
2510     s->last_bits= put_bits_count(&s->pb);
2511     s->mv_bits=0;
2512     s->misc_bits=0;
2513     s->i_tex_bits=0;
2514     s->p_tex_bits=0;
2515     s->i_count=0;
2516     s->f_count=0;
2517     s->b_count=0;
2518     s->skip_count=0;
2519
2520     for(i=0; i<3; i++){
2521         /* init last dc values */
2522         /* note: quant matrix value (8) is implied here */
2523         s->last_dc[i] = 128 << s->intra_dc_precision;
2524
2525         s->current_picture.f->error[i] = 0;
2526     }
2527     s->mb_skip_run = 0;
2528     memset(s->last_mv, 0, sizeof(s->last_mv));
2529
2530     s->last_mv_dir = 0;
2531
2532     switch(s->codec_id){
2533     case AV_CODEC_ID_H263:
2534     case AV_CODEC_ID_H263P:
2535     case AV_CODEC_ID_FLV1:
2536         if (CONFIG_H263_ENCODER)
2537             s->gob_index = ff_h263_get_gob_height(s);
2538         break;
2539     case AV_CODEC_ID_MPEG4:
2540         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2541             ff_mpeg4_init_partitions(s);
2542         break;
2543     }
2544
2545     s->resync_mb_x=0;
2546     s->resync_mb_y=0;
2547     s->first_slice_line = 1;
2548     s->ptr_lastgob = s->pb.buf;
2549     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2550         s->mb_x=0;
2551         s->mb_y= mb_y;
2552
2553         ff_set_qscale(s, s->qscale);
2554         ff_init_block_index(s);
2555
2556         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2557             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2558             int mb_type= s->mb_type[xy];
2559 //            int d;
2560             int dmin= INT_MAX;
2561             int dir;
2562
2563             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2564                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2565                 return -1;
2566             }
2567             if(s->data_partitioning){
2568                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2569                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2570                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2571                     return -1;
2572                 }
2573             }
2574
2575             s->mb_x = mb_x;
2576             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2577             ff_update_block_index(s);
2578
2579             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2580                 ff_h261_reorder_mb_index(s);
2581                 xy= s->mb_y*s->mb_stride + s->mb_x;
2582                 mb_type= s->mb_type[xy];
2583             }
2584
2585             /* write gob / video packet header  */
2586             if(s->rtp_mode){
2587                 int current_packet_size, is_gob_start;
2588
2589                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2590
2591                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2592
2593                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2594
2595                 switch(s->codec_id){
2596                 case AV_CODEC_ID_H263:
2597                 case AV_CODEC_ID_H263P:
2598                     if(!s->h263_slice_structured)
2599                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2600                     break;
2601                 case AV_CODEC_ID_MPEG2VIDEO:
2602                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2603                 case AV_CODEC_ID_MPEG1VIDEO:
2604                     if(s->mb_skip_run) is_gob_start=0;
2605                     break;
2606                 }
2607
2608                 if(is_gob_start){
2609                     if(s->start_mb_y != mb_y || mb_x!=0){
2610                         write_slice_end(s);
2611
2612                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2613                             ff_mpeg4_init_partitions(s);
2614                         }
2615                     }
2616
2617                     assert((put_bits_count(&s->pb)&7) == 0);
2618                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2619
2620                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2621                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2622                         int d = 100 / s->error_rate;
2623                         if(r % d == 0){
2624                             current_packet_size=0;
2625                             s->pb.buf_ptr= s->ptr_lastgob;
2626                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2627                         }
2628                     }
2629
2630                     if (s->avctx->rtp_callback){
2631                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2632                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2633                     }
2634                     update_mb_info(s, 1);
2635
2636                     switch(s->codec_id){
2637                     case AV_CODEC_ID_MPEG4:
2638                         if (CONFIG_MPEG4_ENCODER) {
2639                             ff_mpeg4_encode_video_packet_header(s);
2640                             ff_mpeg4_clean_buffers(s);
2641                         }
2642                     break;
2643                     case AV_CODEC_ID_MPEG1VIDEO:
2644                     case AV_CODEC_ID_MPEG2VIDEO:
2645                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2646                             ff_mpeg1_encode_slice_header(s);
2647                             ff_mpeg1_clean_buffers(s);
2648                         }
2649                     break;
2650                     case AV_CODEC_ID_H263:
2651                     case AV_CODEC_ID_H263P:
2652                         if (CONFIG_H263_ENCODER)
2653                             ff_h263_encode_gob_header(s, mb_y);
2654                     break;
2655                     }
2656
2657                     if(s->flags&CODEC_FLAG_PASS1){
2658                         int bits= put_bits_count(&s->pb);
2659                         s->misc_bits+= bits - s->last_bits;
2660                         s->last_bits= bits;
2661                     }
2662
2663                     s->ptr_lastgob += current_packet_size;
2664                     s->first_slice_line=1;
2665                     s->resync_mb_x=mb_x;
2666                     s->resync_mb_y=mb_y;
2667                 }
2668             }
2669
2670             if(  (s->resync_mb_x   == s->mb_x)
2671                && s->resync_mb_y+1 == s->mb_y){
2672                 s->first_slice_line=0;
2673             }
2674
2675             s->mb_skipped=0;
2676             s->dquant=0; //only for QP_RD
2677
2678             update_mb_info(s, 0);
2679
2680             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2681                 int next_block=0;
2682                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2683
2684                 copy_context_before_encode(&backup_s, s, -1);
2685                 backup_s.pb= s->pb;
2686                 best_s.data_partitioning= s->data_partitioning;
2687                 best_s.partitioned_frame= s->partitioned_frame;
2688                 if(s->data_partitioning){
2689                     backup_s.pb2= s->pb2;
2690                     backup_s.tex_pb= s->tex_pb;
2691                 }
2692
2693                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2694                     s->mv_dir = MV_DIR_FORWARD;
2695                     s->mv_type = MV_TYPE_16X16;
2696                     s->mb_intra= 0;
2697                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2698                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2699                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2700                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2701                 }
2702                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2703                     s->mv_dir = MV_DIR_FORWARD;
2704                     s->mv_type = MV_TYPE_FIELD;
2705                     s->mb_intra= 0;
2706                     for(i=0; i<2; i++){
2707                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2708                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2709                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2710                     }
2711                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2712                                  &dmin, &next_block, 0, 0);
2713                 }
2714                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2715                     s->mv_dir = MV_DIR_FORWARD;
2716                     s->mv_type = MV_TYPE_16X16;
2717                     s->mb_intra= 0;
2718                     s->mv[0][0][0] = 0;
2719                     s->mv[0][0][1] = 0;
2720                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2721                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2722                 }
2723                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2724                     s->mv_dir = MV_DIR_FORWARD;
2725                     s->mv_type = MV_TYPE_8X8;
2726                     s->mb_intra= 0;
2727                     for(i=0; i<4; i++){
2728                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2729                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2730                     }
2731                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2732                                  &dmin, &next_block, 0, 0);
2733                 }
2734                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2735                     s->mv_dir = MV_DIR_FORWARD;
2736                     s->mv_type = MV_TYPE_16X16;
2737                     s->mb_intra= 0;
2738                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2739                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2740                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2741                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2742                 }
2743                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2744                     s->mv_dir = MV_DIR_BACKWARD;
2745                     s->mv_type = MV_TYPE_16X16;
2746                     s->mb_intra= 0;
2747                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2748                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2749                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2750                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2751                 }
2752                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2753                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2754                     s->mv_type = MV_TYPE_16X16;
2755                     s->mb_intra= 0;
2756                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2757                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2758                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2759                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2760                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2761                                  &dmin, &next_block, 0, 0);
2762                 }
2763                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2764                     s->mv_dir = MV_DIR_FORWARD;
2765                     s->mv_type = MV_TYPE_FIELD;
2766                     s->mb_intra= 0;
2767                     for(i=0; i<2; i++){
2768                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2769                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2770                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2771                     }
2772                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2773                                  &dmin, &next_block, 0, 0);
2774                 }
2775                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2776                     s->mv_dir = MV_DIR_BACKWARD;
2777                     s->mv_type = MV_TYPE_FIELD;
2778                     s->mb_intra= 0;
2779                     for(i=0; i<2; i++){
2780                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2781                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2782                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2783                     }
2784                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2785                                  &dmin, &next_block, 0, 0);
2786                 }
2787                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2788                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2789                     s->mv_type = MV_TYPE_FIELD;
2790                     s->mb_intra= 0;
2791                     for(dir=0; dir<2; dir++){
2792                         for(i=0; i<2; i++){
2793                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2794                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2795                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2796                         }
2797                     }
2798                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2799                                  &dmin, &next_block, 0, 0);
2800                 }
2801                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2802                     s->mv_dir = 0;
2803                     s->mv_type = MV_TYPE_16X16;
2804                     s->mb_intra= 1;
2805                     s->mv[0][0][0] = 0;
2806                     s->mv[0][0][1] = 0;
2807                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2808                                  &dmin, &next_block, 0, 0);
2809                     if(s->h263_pred || s->h263_aic){
2810                         if(best_s.mb_intra)
2811                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2812                         else
2813                             ff_clean_intra_table_entries(s); //old mode?
2814                     }
2815                 }
2816
2817                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2818                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2819                         const int last_qp= backup_s.qscale;
2820                         int qpi, qp, dc[6];
2821                         int16_t ac[6][16];
2822                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2823                         static const int dquant_tab[4]={-1,1,-2,2};
2824
2825                         assert(backup_s.dquant == 0);
2826
2827                         //FIXME intra
2828                         s->mv_dir= best_s.mv_dir;
2829                         s->mv_type = MV_TYPE_16X16;
2830                         s->mb_intra= best_s.mb_intra;
2831                         s->mv[0][0][0] = best_s.mv[0][0][0];
2832                         s->mv[0][0][1] = best_s.mv[0][0][1];
2833                         s->mv[1][0][0] = best_s.mv[1][0][0];
2834                         s->mv[1][0][1] = best_s.mv[1][0][1];
2835
2836                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2837                         for(; qpi<4; qpi++){
2838                             int dquant= dquant_tab[qpi];
2839                             qp= last_qp + dquant;
2840                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2841                                 continue;
2842                             backup_s.dquant= dquant;
2843                             if(s->mb_intra && s->dc_val[0]){
2844                                 for(i=0; i<6; i++){
2845                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2846                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2847                                 }
2848                             }
2849
2850                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2851                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2852                             if(best_s.qscale != qp){
2853                                 if(s->mb_intra && s->dc_val[0]){
2854                                     for(i=0; i<6; i++){
2855                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2856                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2857                                     }
2858                                 }
2859                             }
2860                         }
2861                     }
2862                 }
2863                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2864                     int mx= s->b_direct_mv_table[xy][0];
2865                     int my= s->b_direct_mv_table[xy][1];
2866
2867                     backup_s.dquant = 0;
2868                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2869                     s->mb_intra= 0;
2870                     ff_mpeg4_set_direct_mv(s, mx, my);
2871                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2872                                  &dmin, &next_block, mx, my);
2873                 }
2874                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2875                     backup_s.dquant = 0;
2876                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2877                     s->mb_intra= 0;
2878                     ff_mpeg4_set_direct_mv(s, 0, 0);
2879                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2880                                  &dmin, &next_block, 0, 0);
2881                 }
2882                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2883                     int coded=0;
2884                     for(i=0; i<6; i++)
2885                         coded |= s->block_last_index[i];
2886                     if(coded){
2887                         int mx,my;
2888                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2889                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2890                             mx=my=0; //FIXME find the one we actually used
2891                             ff_mpeg4_set_direct_mv(s, mx, my);
2892                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2893                             mx= s->mv[1][0][0];
2894                             my= s->mv[1][0][1];
2895                         }else{
2896                             mx= s->mv[0][0][0];
2897                             my= s->mv[0][0][1];
2898                         }
2899
2900                         s->mv_dir= best_s.mv_dir;
2901                         s->mv_type = best_s.mv_type;
2902                         s->mb_intra= 0;
2903 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2904                         s->mv[0][0][1] = best_s.mv[0][0][1];
2905                         s->mv[1][0][0] = best_s.mv[1][0][0];
2906                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2907                         backup_s.dquant= 0;
2908                         s->skipdct=1;
2909                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2910                                         &dmin, &next_block, mx, my);
2911                         s->skipdct=0;
2912                     }
2913                 }
2914
2915                 s->current_picture.qscale_table[xy] = best_s.qscale;
2916
2917                 copy_context_after_encode(s, &best_s, -1);
2918
2919                 pb_bits_count= put_bits_count(&s->pb);
2920                 flush_put_bits(&s->pb);
2921                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2922                 s->pb= backup_s.pb;
2923
2924                 if(s->data_partitioning){
2925                     pb2_bits_count= put_bits_count(&s->pb2);
2926                     flush_put_bits(&s->pb2);
2927                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2928                     s->pb2= backup_s.pb2;
2929
2930                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2931                     flush_put_bits(&s->tex_pb);
2932                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2933                     s->tex_pb= backup_s.tex_pb;
2934                 }
2935                 s->last_bits= put_bits_count(&s->pb);
2936
2937                 if (CONFIG_H263_ENCODER &&
2938                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2939                     ff_h263_update_motion_val(s);
2940
2941                 if(next_block==0){ //FIXME 16 vs linesize16
2942                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2943                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2944                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2945                 }
2946
2947                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2948                     ff_MPV_decode_mb(s, s->block);
2949             } else {
2950                 int motion_x = 0, motion_y = 0;
2951                 s->mv_type=MV_TYPE_16X16;
2952                 // only one MB-Type possible
2953
2954                 switch(mb_type){
2955                 case CANDIDATE_MB_TYPE_INTRA:
2956                     s->mv_dir = 0;
2957                     s->mb_intra= 1;
2958                     motion_x= s->mv[0][0][0] = 0;
2959                     motion_y= s->mv[0][0][1] = 0;
2960                     break;
2961                 case CANDIDATE_MB_TYPE_INTER:
2962                     s->mv_dir = MV_DIR_FORWARD;
2963                     s->mb_intra= 0;
2964                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2965                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2966                     break;
2967                 case CANDIDATE_MB_TYPE_INTER_I:
2968                     s->mv_dir = MV_DIR_FORWARD;
2969                     s->mv_type = MV_TYPE_FIELD;
2970                     s->mb_intra= 0;
2971                     for(i=0; i<2; i++){
2972                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2973                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2974                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2975                     }
2976                     break;
2977                 case CANDIDATE_MB_TYPE_INTER4V:
2978                     s->mv_dir = MV_DIR_FORWARD;
2979                     s->mv_type = MV_TYPE_8X8;
2980                     s->mb_intra= 0;
2981                     for(i=0; i<4; i++){
2982                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2983                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2984                     }
2985                     break;
2986                 case CANDIDATE_MB_TYPE_DIRECT:
2987                     if (CONFIG_MPEG4_ENCODER) {
2988                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2989                         s->mb_intra= 0;
2990                         motion_x=s->b_direct_mv_table[xy][0];
2991                         motion_y=s->b_direct_mv_table[xy][1];
2992                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2993                     }
2994                     break;
2995                 case CANDIDATE_MB_TYPE_DIRECT0:
2996                     if (CONFIG_MPEG4_ENCODER) {
2997                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2998                         s->mb_intra= 0;
2999                         ff_mpeg4_set_direct_mv(s, 0, 0);
3000                     }
3001                     break;
3002                 case CANDIDATE_MB_TYPE_BIDIR:
3003                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3004                     s->mb_intra= 0;
3005                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3006                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3007                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3008                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3009                     break;
3010                 case CANDIDATE_MB_TYPE_BACKWARD:
3011                     s->mv_dir = MV_DIR_BACKWARD;
3012                     s->mb_intra= 0;
3013                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3014                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3015                     break;
3016                 case CANDIDATE_MB_TYPE_FORWARD:
3017                     s->mv_dir = MV_DIR_FORWARD;
3018                     s->mb_intra= 0;
3019                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3020                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3021                     break;
3022                 case CANDIDATE_MB_TYPE_FORWARD_I:
3023                     s->mv_dir = MV_DIR_FORWARD;
3024                     s->mv_type = MV_TYPE_FIELD;
3025                     s->mb_intra= 0;
3026                     for(i=0; i<2; i++){
3027                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3028                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3029                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3030                     }
3031                     break;
3032                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3033                     s->mv_dir = MV_DIR_BACKWARD;
3034                     s->mv_type = MV_TYPE_FIELD;
3035                     s->mb_intra= 0;
3036                     for(i=0; i<2; i++){
3037                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3038                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3039                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3040                     }
3041                     break;
3042                 case CANDIDATE_MB_TYPE_BIDIR_I:
3043                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3044                     s->mv_type = MV_TYPE_FIELD;
3045                     s->mb_intra= 0;
3046                     for(dir=0; dir<2; dir++){
3047                         for(i=0; i<2; i++){
3048                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3049                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3050                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3051                         }
3052                     }
3053                     break;
3054                 default:
3055                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3056                 }
3057
3058                 encode_mb(s, motion_x, motion_y);
3059
3060                 // RAL: Update last macroblock type
3061                 s->last_mv_dir = s->mv_dir;
3062
3063                 if (CONFIG_H263_ENCODER &&
3064                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3065                     ff_h263_update_motion_val(s);
3066
3067                 ff_MPV_decode_mb(s, s->block);
3068             }
3069
3070             /* clean the MV table in IPS frames for direct mode in B frames */
3071             if(s->mb_intra /* && I,P,S_TYPE */){
3072                 s->p_mv_table[xy][0]=0;
3073                 s->p_mv_table[xy][1]=0;
3074             }
3075
3076             if(s->flags&CODEC_FLAG_PSNR){
3077                 int w= 16;
3078                 int h= 16;
3079
3080                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3081                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3082
3083                 s->current_picture.f->error[0] += sse(
3084                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3085                     s->dest[0], w, h, s->linesize);
3086                 s->current_picture.f->error[1] += sse(
3087                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3088                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3089                 s->current_picture.f->error[2] += sse(
3090                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3091                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3092             }
3093             if(s->loop_filter){
3094                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3095                     ff_h263_loop_filter(s);
3096             }
3097             av_dlog(s->avctx, "MB %d %d bits\n",
3098                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3099         }
3100     }
3101
3102     //not beautiful here but we must write it before flushing so it has to be here
3103     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3104         ff_msmpeg4_encode_ext_header(s);
3105
3106     write_slice_end(s);
3107
3108     /* Send the last GOB if RTP */
3109     if (s->avctx->rtp_callback) {
3110         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3111         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3112         /* Call the RTP callback to send the last GOB */
3113         emms_c();
3114         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3115     }
3116
3117     return 0;
3118 }
3119
3120 #define MERGE(field) dst->field += src->field; src->field=0
3121 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3122     MERGE(me.scene_change_score);
3123     MERGE(me.mc_mb_var_sum_temp);
3124     MERGE(me.mb_var_sum_temp);
3125 }
3126
3127 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3128     int i;
3129
3130     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3131     MERGE(dct_count[1]);
3132     MERGE(mv_bits);
3133     MERGE(i_tex_bits);
3134     MERGE(p_tex_bits);
3135     MERGE(i_count);
3136     MERGE(f_count);
3137     MERGE(b_count);
3138     MERGE(skip_count);
3139     MERGE(misc_bits);
3140     MERGE(er.error_count);
3141     MERGE(padding_bug_score);
3142     MERGE(current_picture.f->error[0]);
3143     MERGE(current_picture.f->error[1]);
3144     MERGE(current_picture.f->error[2]);
3145
3146     if(dst->avctx->noise_reduction){
3147         for(i=0; i<64; i++){
3148             MERGE(dct_error_sum[0][i]);
3149             MERGE(dct_error_sum[1][i]);
3150         }
3151     }
3152
3153     assert(put_bits_count(&src->pb) % 8 ==0);
3154     assert(put_bits_count(&dst->pb) % 8 ==0);
3155     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3156     flush_put_bits(&dst->pb);
3157 }
3158
3159 static int estimate_qp(MpegEncContext *s, int dry_run){
3160     if (s->next_lambda){
3161         s->current_picture_ptr->f->quality =
3162         s->current_picture.f->quality = s->next_lambda;
3163         if(!dry_run) s->next_lambda= 0;
3164     } else if (!s->fixed_qscale) {
3165         s->current_picture_ptr->f->quality =
3166         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3167         if (s->current_picture.f->quality < 0)
3168             return -1;
3169     }
3170
3171     if(s->adaptive_quant){
3172         switch(s->codec_id){
3173         case AV_CODEC_ID_MPEG4:
3174             if (CONFIG_MPEG4_ENCODER)
3175                 ff_clean_mpeg4_qscales(s);
3176             break;
3177         case AV_CODEC_ID_H263:
3178         case AV_CODEC_ID_H263P:
3179         case AV_CODEC_ID_FLV1:
3180             if (CONFIG_H263_ENCODER)
3181                 ff_clean_h263_qscales(s);
3182             break;
3183         default:
3184             ff_init_qscale_tab(s);
3185         }
3186
3187         s->lambda= s->lambda_table[0];
3188         //FIXME broken
3189     }else
3190         s->lambda = s->current_picture.f->quality;
3191     update_qscale(s);
3192     return 0;
3193 }
3194
3195 /* must be called before writing the header */
3196 static void set_frame_distances(MpegEncContext * s){
3197     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3198     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3199
3200     if(s->pict_type==AV_PICTURE_TYPE_B){
3201         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3202         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3203     }else{
3204         s->pp_time= s->time - s->last_non_b_time;
3205         s->last_non_b_time= s->time;
3206         assert(s->picture_number==0 || s->pp_time > 0);
3207     }
3208 }
3209
3210 static int encode_picture(MpegEncContext *s, int picture_number)
3211 {
3212     int i, ret;
3213     int bits;
3214     int context_count = s->slice_context_count;
3215
3216     s->picture_number = picture_number;
3217
3218     /* Reset the average MB variance */
3219     s->me.mb_var_sum_temp    =
3220     s->me.mc_mb_var_sum_temp = 0;
3221
3222     /* we need to initialize some time vars before we can encode b-frames */
3223     // RAL: Condition added for MPEG1VIDEO
3224     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3225         set_frame_distances(s);
3226     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3227         ff_set_mpeg4_time(s);
3228
3229     s->me.scene_change_score=0;
3230
3231 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3232
3233     if(s->pict_type==AV_PICTURE_TYPE_I){
3234         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3235         else                        s->no_rounding=0;
3236     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3237         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3238             s->no_rounding ^= 1;
3239     }
3240
3241     if(s->flags & CODEC_FLAG_PASS2){
3242         if (estimate_qp(s,1) < 0)
3243             return -1;
3244         ff_get_2pass_fcode(s);
3245     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3246         if(s->pict_type==AV_PICTURE_TYPE_B)
3247             s->lambda= s->last_lambda_for[s->pict_type];
3248         else
3249             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3250         update_qscale(s);
3251     }
3252
3253     s->mb_intra=0; //for the rate distortion & bit compare functions
3254     for(i=1; i<context_count; i++){
3255         ret = ff_update_duplicate_context(s->thread_context[i], s);
3256         if (ret < 0)
3257             return ret;
3258     }
3259
3260     if(ff_init_me(s)<0)
3261         return -1;
3262
3263     /* Estimate motion for every MB */
3264     if(s->pict_type != AV_PICTURE_TYPE_I){
3265         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3266         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3267         if (s->pict_type != AV_PICTURE_TYPE_B) {
3268             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3269                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3270             }
3271         }
3272
3273         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3274     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3275         /* I-Frame */
3276         for(i=0; i<s->mb_stride*s->mb_height; i++)
3277             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3278
3279         if(!s->fixed_qscale){
3280             /* finding spatial complexity for I-frame rate control */
3281             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3282         }
3283     }
3284     for(i=1; i<context_count; i++){
3285         merge_context_after_me(s, s->thread_context[i]);
3286     }
3287     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3288     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3289     emms_c();
3290
3291     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3292         s->pict_type= AV_PICTURE_TYPE_I;
3293         for(i=0; i<s->mb_stride*s->mb_height; i++)
3294             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3295         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3296                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3297     }
3298
3299     if(!s->umvplus){
3300         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3301             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3302
3303             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3304                 int a,b;
3305                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3306                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3307                 s->f_code= FFMAX3(s->f_code, a, b);
3308             }
3309
3310             ff_fix_long_p_mvs(s);
3311             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3312             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3313                 int j;
3314                 for(i=0; i<2; i++){
3315                     for(j=0; j<2; j++)
3316                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3317                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3318                 }
3319             }
3320         }
3321
3322         if(s->pict_type==AV_PICTURE_TYPE_B){
3323             int a, b;
3324
3325             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3326             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3327             s->f_code = FFMAX(a, b);
3328
3329             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3330             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3331             s->b_code = FFMAX(a, b);
3332
3333             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3334             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3335             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3336             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3337             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3338                 int dir, j;
3339                 for(dir=0; dir<2; dir++){
3340                     for(i=0; i<2; i++){
3341                         for(j=0; j<2; j++){
3342                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3343                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3344                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3345                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3346                         }
3347                     }
3348                 }
3349             }
3350         }
3351     }
3352
3353     if (estimate_qp(s, 0) < 0)
3354         return -1;
3355
3356     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3357         s->qscale= 3; //reduce clipping problems
3358
3359     if (s->out_format == FMT_MJPEG) {
3360         /* for mjpeg, we do include qscale in the matrix */
3361         for(i=1;i<64;i++){
3362             int j= s->dsp.idct_permutation[i];
3363
3364             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3365         }
3366         s->y_dc_scale_table=
3367         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3368         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3369         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3370                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3371         s->qscale= 8;
3372     }
3373
3374     //FIXME var duplication
3375     s->current_picture_ptr->f->key_frame =
3376     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3377     s->current_picture_ptr->f->pict_type =
3378     s->current_picture.f->pict_type = s->pict_type;
3379
3380     if (s->current_picture.f->key_frame)
3381         s->picture_in_gop_number=0;
3382
3383     s->last_bits= put_bits_count(&s->pb);
3384     switch(s->out_format) {
3385     case FMT_MJPEG:
3386         if (CONFIG_MJPEG_ENCODER)
3387             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3388                                            s->intra_matrix);
3389         break;
3390     case FMT_H261:
3391         if (CONFIG_H261_ENCODER)
3392             ff_h261_encode_picture_header(s, picture_number);
3393         break;
3394     case FMT_H263:
3395         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3396             ff_wmv2_encode_picture_header(s, picture_number);
3397         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3398             ff_msmpeg4_encode_picture_header(s, picture_number);
3399         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3400             ff_mpeg4_encode_picture_header(s, picture_number);
3401         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3402             ff_rv10_encode_picture_header(s, picture_number);
3403         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3404             ff_rv20_encode_picture_header(s, picture_number);
3405         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3406             ff_flv_encode_picture_header(s, picture_number);
3407         else if (CONFIG_H263_ENCODER)
3408             ff_h263_encode_picture_header(s, picture_number);
3409         break;
3410     case FMT_MPEG1:
3411         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3412             ff_mpeg1_encode_picture_header(s, picture_number);
3413         break;
3414     default:
3415         assert(0);
3416     }
3417     bits= put_bits_count(&s->pb);
3418     s->header_bits= bits - s->last_bits;
3419
3420     for(i=1; i<context_count; i++){
3421         update_duplicate_context_after_me(s->thread_context[i], s);
3422     }
3423     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3424     for(i=1; i<context_count; i++){
3425         merge_context_after_encode(s, s->thread_context[i]);
3426     }
3427     emms_c();
3428     return 0;
3429 }
3430
3431 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3432     const int intra= s->mb_intra;
3433     int i;
3434
3435     s->dct_count[intra]++;
3436
3437     for(i=0; i<64; i++){
3438         int level= block[i];
3439
3440         if(level){
3441             if(level>0){
3442                 s->dct_error_sum[intra][i] += level;
3443                 level -= s->dct_offset[intra][i];
3444                 if(level<0) level=0;
3445             }else{
3446                 s->dct_error_sum[intra][i] -= level;
3447                 level += s->dct_offset[intra][i];
3448                 if(level>0) level=0;
3449             }
3450             block[i]= level;
3451         }
3452     }
3453 }
3454
3455 static int dct_quantize_trellis_c(MpegEncContext *s,
3456                                   int16_t *block, int n,
3457                                   int qscale, int *overflow){
3458     const int *qmat;
3459     const uint8_t *scantable= s->intra_scantable.scantable;
3460     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3461     int max=0;
3462     unsigned int threshold1, threshold2;
3463     int bias=0;
3464     int run_tab[65];
3465     int level_tab[65];
3466     int score_tab[65];
3467     int survivor[65];
3468     int survivor_count;
3469     int last_run=0;
3470     int last_level=0;
3471     int last_score= 0;
3472     int last_i;
3473     int coeff[2][64];
3474     int coeff_count[64];
3475     int qmul, qadd, start_i, last_non_zero, i, dc;
3476     const int esc_length= s->ac_esc_length;
3477     uint8_t * length;
3478     uint8_t * last_length;
3479     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3480
3481     s->dsp.fdct (block);
3482
3483     if(s->dct_error_sum)
3484         s->denoise_dct(s, block);
3485     qmul= qscale*16;
3486     qadd= ((qscale-1)|1)*8;
3487
3488     if (s->mb_intra) {
3489         int q;
3490         if (!s->h263_aic) {
3491             if (n < 4)
3492                 q = s->y_dc_scale;
3493             else
3494                 q = s->c_dc_scale;
3495             q = q << 3;
3496         } else{
3497             /* For AIC we skip quant/dequant of INTRADC */
3498             q = 1 << 3;
3499             qadd=0;
3500         }
3501
3502         /* note: block[0] is assumed to be positive */
3503         block[0] = (block[0] + (q >> 1)) / q;
3504         start_i = 1;
3505         last_non_zero = 0;
3506         qmat = s->q_intra_matrix[qscale];
3507         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3508             bias= 1<<(QMAT_SHIFT-1);
3509         length     = s->intra_ac_vlc_length;
3510         last_length= s->intra_ac_vlc_last_length;
3511     } else {
3512         start_i = 0;
3513         last_non_zero = -1;
3514         qmat = s->q_inter_matrix[qscale];
3515         length     = s->inter_ac_vlc_length;
3516         last_length= s->inter_ac_vlc_last_length;
3517     }
3518     last_i= start_i;
3519
3520     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3521     threshold2= (threshold1<<1);
3522
3523     for(i=63; i>=start_i; i--) {
3524         const int j = scantable[i];
3525         int level = block[j] * qmat[j];
3526
3527         if(((unsigned)(level+threshold1))>threshold2){
3528             last_non_zero = i;
3529             break;
3530         }
3531     }
3532
3533     for(i=start_i; i<=last_non_zero; i++) {
3534         const int j = scantable[i];
3535         int level = block[j] * qmat[j];
3536
3537 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3538 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3539         if(((unsigned)(level+threshold1))>threshold2){
3540             if(level>0){
3541                 level= (bias + level)>>QMAT_SHIFT;
3542                 coeff[0][i]= level;
3543                 coeff[1][i]= level-1;
3544 //                coeff[2][k]= level-2;
3545             }else{
3546                 level= (bias - level)>>QMAT_SHIFT;
3547                 coeff[0][i]= -level;
3548                 coeff[1][i]= -level+1;
3549 //                coeff[2][k]= -level+2;
3550             }
3551             coeff_count[i]= FFMIN(level, 2);
3552             assert(coeff_count[i]);
3553             max |=level;
3554         }else{
3555             coeff[0][i]= (level>>31)|1;
3556             coeff_count[i]= 1;
3557         }
3558     }
3559
3560     *overflow= s->max_qcoeff < max; //overflow might have happened
3561
3562     if(last_non_zero < start_i){
3563         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3564         return last_non_zero;
3565     }
3566
3567     score_tab[start_i]= 0;
3568     survivor[0]= start_i;
3569     survivor_count= 1;
3570
3571     for(i=start_i; i<=last_non_zero; i++){
3572         int level_index, j, zero_distortion;
3573         int dct_coeff= FFABS(block[ scantable[i] ]);
3574         int best_score=256*256*256*120;
3575
3576         if (s->dsp.fdct == ff_fdct_ifast)
3577             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3578         zero_distortion= dct_coeff*dct_coeff;
3579
3580         for(level_index=0; level_index < coeff_count[i]; level_index++){
3581             int distortion;
3582             int level= coeff[level_index][i];
3583             const int alevel= FFABS(level);
3584             int unquant_coeff;
3585
3586             assert(level);
3587
3588             if(s->out_format == FMT_H263){
3589                 unquant_coeff= alevel*qmul + qadd;
3590             }else{ //MPEG1
3591                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3592                 if(s->mb_intra){
3593                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3594                         unquant_coeff =   (unquant_coeff - 1) | 1;
3595                 }else{
3596                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3597                         unquant_coeff =   (unquant_coeff - 1) | 1;
3598                 }
3599                 unquant_coeff<<= 3;
3600             }
3601
3602             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3603             level+=64;
3604             if((level&(~127)) == 0){
3605                 for(j=survivor_count-1; j>=0; j--){
3606                     int run= i - survivor[j];
3607                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3608                     score += score_tab[i-run];
3609
3610                     if(score < best_score){
3611                         best_score= score;
3612                         run_tab[i+1]= run;
3613                         level_tab[i+1]= level-64;
3614                     }
3615                 }
3616
3617                 if(s->out_format == FMT_H263){
3618                     for(j=survivor_count-1; j>=0; j--){
3619                         int run= i - survivor[j];
3620                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3621                         score += score_tab[i-run];
3622                         if(score < last_score){
3623                             last_score= score;
3624                             last_run= run;
3625                             last_level= level-64;
3626                             last_i= i+1;
3627                         }
3628                     }
3629                 }
3630             }else{
3631                 distortion += esc_length*lambda;
3632                 for(j=survivor_count-1; j>=0; j--){
3633                     int run= i - survivor[j];
3634                     int score= distortion + score_tab[i-run];
3635
3636                     if(score < best_score){
3637                         best_score= score;
3638                         run_tab[i+1]= run;
3639                         level_tab[i+1]= level-64;
3640                     }
3641                 }
3642
3643                 if(s->out_format == FMT_H263){
3644                   for(j=survivor_count-1; j>=0; j--){
3645                         int run= i - survivor[j];
3646                         int score= distortion + score_tab[i-run];
3647                         if(score < last_score){
3648                             last_score= score;
3649                             last_run= run;
3650                             last_level= level-64;
3651                             last_i= i+1;
3652                         }
3653                     }
3654                 }
3655             }
3656         }
3657
3658         score_tab[i+1]= best_score;
3659
3660         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3661         if(last_non_zero <= 27){
3662             for(; survivor_count; survivor_count--){
3663                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3664                     break;
3665             }
3666         }else{
3667             for(; survivor_count; survivor_count--){
3668                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3669                     break;
3670             }
3671         }
3672
3673         survivor[ survivor_count++ ]= i+1;
3674     }
3675
3676     if(s->out_format != FMT_H263){
3677         last_score= 256*256*256*120;
3678         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3679             int score= score_tab[i];
3680             if(i) score += lambda*2; //FIXME exacter?
3681
3682             if(score < last_score){
3683                 last_score= score;
3684                 last_i= i;
3685                 last_level= level_tab[i];
3686                 last_run= run_tab[i];
3687             }
3688         }
3689     }
3690
3691     s->coded_score[n] = last_score;
3692
3693     dc= FFABS(block[0]);
3694     last_non_zero= last_i - 1;
3695     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3696
3697     if(last_non_zero < start_i)
3698         return last_non_zero;
3699
3700     if(last_non_zero == 0 && start_i == 0){
3701         int best_level= 0;
3702         int best_score= dc * dc;
3703
3704         for(i=0; i<coeff_count[0]; i++){
3705             int level= coeff[i][0];
3706             int alevel= FFABS(level);
3707             int unquant_coeff, score, distortion;
3708
3709             if(s->out_format == FMT_H263){
3710                     unquant_coeff= (alevel*qmul + qadd)>>3;
3711             }else{ //MPEG1
3712                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3713                     unquant_coeff =   (unquant_coeff - 1) | 1;
3714             }
3715             unquant_coeff = (unquant_coeff + 4) >> 3;
3716             unquant_coeff<<= 3 + 3;
3717
3718             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3719             level+=64;
3720             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3721             else                    score= distortion + esc_length*lambda;
3722
3723             if(score < best_score){
3724                 best_score= score;
3725                 best_level= level - 64;
3726             }
3727         }
3728         block[0]= best_level;
3729         s->coded_score[n] = best_score - dc*dc;
3730         if(best_level == 0) return -1;
3731         else                return last_non_zero;
3732     }
3733
3734     i= last_i;
3735     assert(last_level);
3736
3737     block[ perm_scantable[last_non_zero] ]= last_level;
3738     i -= last_run + 1;
3739
3740     for(; i>start_i; i -= run_tab[i] + 1){
3741         block[ perm_scantable[i-1] ]= level_tab[i];
3742     }
3743
3744     return last_non_zero;
3745 }
3746
3747 //#define REFINE_STATS 1
3748 static int16_t basis[64][64];
3749
3750 static void build_basis(uint8_t *perm){
3751     int i, j, x, y;
3752     emms_c();
3753     for(i=0; i<8; i++){
3754         for(j=0; j<8; j++){
3755             for(y=0; y<8; y++){
3756                 for(x=0; x<8; x++){
3757                     double s= 0.25*(1<<BASIS_SHIFT);
3758                     int index= 8*i + j;
3759                     int perm_index= perm[index];
3760                     if(i==0) s*= sqrt(0.5);
3761                     if(j==0) s*= sqrt(0.5);
3762                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3763                 }
3764             }
3765         }
3766     }
3767 }
3768
3769 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3770                         int16_t *block, int16_t *weight, int16_t *orig,
3771                         int n, int qscale){
3772     int16_t rem[64];
3773     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3774     const uint8_t *scantable= s->intra_scantable.scantable;
3775     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3776 //    unsigned int threshold1, threshold2;
3777 //    int bias=0;
3778     int run_tab[65];
3779     int prev_run=0;
3780     int prev_level=0;
3781     int qmul, qadd, start_i, last_non_zero, i, dc;
3782     uint8_t * length;
3783     uint8_t * last_length;
3784     int lambda;
3785     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3786 #ifdef REFINE_STATS
3787 static int count=0;
3788 static int after_last=0;
3789 static int to_zero=0;
3790 static int from_zero=0;
3791 static int raise=0;
3792 static int lower=0;
3793 static int messed_sign=0;
3794 #endif
3795
3796     if(basis[0][0] == 0)
3797         build_basis(s->dsp.idct_permutation);
3798
3799     qmul= qscale*2;
3800     qadd= (qscale-1)|1;
3801     if (s->mb_intra) {
3802         if (!s->h263_aic) {
3803             if (n < 4)
3804                 q = s->y_dc_scale;
3805             else
3806                 q = s->c_dc_scale;
3807         } else{
3808             /* For AIC we skip quant/dequant of INTRADC */
3809             q = 1;
3810             qadd=0;
3811         }
3812         q <<= RECON_SHIFT-3;
3813         /* note: block[0] is assumed to be positive */
3814         dc= block[0]*q;
3815 //        block[0] = (block[0] + (q >> 1)) / q;
3816         start_i = 1;
3817 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3818 //            bias= 1<<(QMAT_SHIFT-1);
3819         length     = s->intra_ac_vlc_length;
3820         last_length= s->intra_ac_vlc_last_length;
3821     } else {
3822         dc= 0;
3823         start_i = 0;
3824         length     = s->inter_ac_vlc_length;
3825         last_length= s->inter_ac_vlc_last_length;
3826     }
3827     last_non_zero = s->block_last_index[n];
3828
3829 #ifdef REFINE_STATS
3830 {START_TIMER
3831 #endif
3832     dc += (1<<(RECON_SHIFT-1));
3833     for(i=0; i<64; i++){
3834         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3835     }
3836 #ifdef REFINE_STATS
3837 STOP_TIMER("memset rem[]")}
3838 #endif
3839     sum=0;
3840     for(i=0; i<64; i++){
3841         int one= 36;
3842         int qns=4;
3843         int w;
3844
3845         w= FFABS(weight[i]) + qns*one;
3846         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3847
3848         weight[i] = w;
3849 //        w=weight[i] = (63*qns + (w/2)) / w;
3850
3851         assert(w>0);
3852         assert(w<(1<<6));
3853         sum += w*w;
3854     }
3855     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3856 #ifdef REFINE_STATS
3857 {START_TIMER
3858 #endif
3859     run=0;
3860     rle_index=0;
3861     for(i=start_i; i<=last_non_zero; i++){
3862         int j= perm_scantable[i];
3863         const int level= block[j];
3864         int coeff;
3865
3866         if(level){
3867             if(level<0) coeff= qmul*level - qadd;
3868             else        coeff= qmul*level + qadd;
3869             run_tab[rle_index++]=run;
3870             run=0;
3871
3872             s->dsp.add_8x8basis(rem, basis[j], coeff);
3873         }else{
3874             run++;
3875         }
3876     }
3877 #ifdef REFINE_STATS
3878 if(last_non_zero>0){
3879 STOP_TIMER("init rem[]")
3880 }
3881 }
3882
3883 {START_TIMER
3884 #endif
3885     for(;;){
3886         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3887         int best_coeff=0;
3888         int best_change=0;
3889         int run2, best_unquant_change=0, analyze_gradient;
3890 #ifdef REFINE_STATS
3891 {START_TIMER
3892 #endif
3893         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3894
3895         if(analyze_gradient){
3896 #ifdef REFINE_STATS
3897 {START_TIMER
3898 #endif
3899             for(i=0; i<64; i++){
3900                 int w= weight[i];
3901
3902                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3903             }
3904 #ifdef REFINE_STATS
3905 STOP_TIMER("rem*w*w")}
3906 {START_TIMER
3907 #endif
3908             s->dsp.fdct(d1);
3909 #ifdef REFINE_STATS
3910 STOP_TIMER("dct")}
3911 #endif
3912         }
3913
3914         if(start_i){
3915             const int level= block[0];
3916             int change, old_coeff;
3917
3918             assert(s->mb_intra);
3919
3920             old_coeff= q*level;
3921
3922             for(change=-1; change<=1; change+=2){
3923                 int new_level= level + change;
3924                 int score, new_coeff;
3925
3926                 new_coeff= q*new_level;
3927                 if(new_coeff >= 2048 || new_coeff < 0)
3928                     continue;
3929
3930                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3931                 if(score<best_score){
3932                     best_score= score;
3933                     best_coeff= 0;
3934                     best_change= change;
3935                     best_unquant_change= new_coeff - old_coeff;
3936                 }
3937             }
3938         }
3939
3940         run=0;
3941         rle_index=0;
3942         run2= run_tab[rle_index++];
3943         prev_level=0;
3944         prev_run=0;
3945
3946         for(i=start_i; i<64; i++){
3947             int j= perm_scantable[i];
3948             const int level= block[j];
3949             int change, old_coeff;
3950
3951             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3952                 break;
3953
3954             if(level){
3955                 if(level<0) old_coeff= qmul*level - qadd;
3956                 else        old_coeff= qmul*level + qadd;
3957                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3958             }else{
3959                 old_coeff=0;
3960                 run2--;
3961                 assert(run2>=0 || i >= last_non_zero );
3962             }
3963
3964             for(change=-1; change<=1; change+=2){
3965                 int new_level= level + change;
3966                 int score, new_coeff, unquant_change;
3967
3968                 score=0;
3969                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3970                    continue;
3971
3972                 if(new_level){
3973                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3974                     else            new_coeff= qmul*new_level + qadd;
3975                     if(new_coeff >= 2048 || new_coeff <= -2048)
3976                         continue;
3977                     //FIXME check for overflow
3978
3979                     if(level){
3980                         if(level < 63 && level > -63){
3981                             if(i < last_non_zero)
3982                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3983                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3984                             else
3985                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3986                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3987                         }
3988                     }else{
3989                         assert(FFABS(new_level)==1);
3990
3991                         if(analyze_gradient){
3992                             int g= d1[ scantable[i] ];
3993                             if(g && (g^new_level) >= 0)
3994                                 continue;
3995                         }
3996
3997                         if(i < last_non_zero){
3998                             int next_i= i + run2 + 1;
3999                             int next_level= block[ perm_scantable[next_i] ] + 64;
4000
4001                             if(next_level&(~127))
4002                                 next_level= 0;
4003
4004                             if(next_i < last_non_zero)
4005                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4006                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4007                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4008                             else
4009                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4010                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4011                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4012                         }else{
4013                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4014                             if(prev_level){
4015                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4016                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4017                             }
4018                         }
4019                     }
4020                 }else{
4021                     new_coeff=0;
4022                     assert(FFABS(level)==1);
4023
4024                     if(i < last_non_zero){
4025                         int next_i= i + run2 + 1;
4026                         int next_level= block[ perm_scantable[next_i] ] + 64;
4027
4028                         if(next_level&(~127))
4029                             next_level= 0;
4030
4031                         if(next_i < last_non_zero)
4032                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4033                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4034                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4035                         else
4036                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4037                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4038                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4039                     }else{
4040                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4041                         if(prev_level){
4042                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4043                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4044                         }
4045                     }
4046                 }
4047
4048                 score *= lambda;
4049
4050                 unquant_change= new_coeff - old_coeff;
4051                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4052
4053                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4054                 if(score<best_score){
4055                     best_score= score;
4056                     best_coeff= i;
4057                     best_change= change;
4058                     best_unquant_change= unquant_change;
4059                 }
4060             }
4061             if(level){
4062                 prev_level= level + 64;
4063                 if(prev_level&(~127))
4064                     prev_level= 0;
4065                 prev_run= run;
4066                 run=0;
4067             }else{
4068                 run++;
4069             }
4070         }
4071 #ifdef REFINE_STATS
4072 STOP_TIMER("iterative step")}
4073 #endif
4074
4075         if(best_change){
4076             int j= perm_scantable[ best_coeff ];
4077
4078             block[j] += best_change;
4079
4080             if(best_coeff > last_non_zero){
4081                 last_non_zero= best_coeff;
4082                 assert(block[j]);
4083 #ifdef REFINE_STATS
4084 after_last++;
4085 #endif
4086             }else{
4087 #ifdef REFINE_STATS
4088 if(block[j]){
4089     if(block[j] - best_change){
4090         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4091             raise++;
4092         }else{
4093             lower++;
4094         }
4095     }else{
4096         from_zero++;
4097     }
4098 }else{
4099     to_zero++;
4100 }
4101 #endif
4102                 for(; last_non_zero>=start_i; last_non_zero--){
4103                     if(block[perm_scantable[last_non_zero]])
4104                         break;
4105                 }
4106             }
4107 #ifdef REFINE_STATS
4108 count++;
4109 if(256*256*256*64 % count == 0){
4110     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4111 }
4112 #endif
4113             run=0;
4114             rle_index=0;
4115             for(i=start_i; i<=last_non_zero; i++){
4116                 int j= perm_scantable[i];
4117                 const int level= block[j];
4118
4119                  if(level){
4120                      run_tab[rle_index++]=run;
4121                      run=0;
4122                  }else{
4123                      run++;
4124                  }
4125             }
4126
4127             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4128         }else{
4129             break;
4130         }
4131     }
4132 #ifdef REFINE_STATS
4133 if(last_non_zero>0){
4134 STOP_TIMER("iterative search")
4135 }
4136 }
4137 #endif
4138
4139     return last_non_zero;
4140 }
4141
4142 int ff_dct_quantize_c(MpegEncContext *s,
4143                         int16_t *block, int n,
4144                         int qscale, int *overflow)
4145 {
4146     int i, j, level, last_non_zero, q, start_i;
4147     const int *qmat;
4148     const uint8_t *scantable= s->intra_scantable.scantable;
4149     int bias;
4150     int max=0;
4151     unsigned int threshold1, threshold2;
4152
4153     s->dsp.fdct (block);
4154
4155     if(s->dct_error_sum)
4156         s->denoise_dct(s, block);
4157
4158     if (s->mb_intra) {
4159         if (!s->h263_aic) {
4160             if (n < 4)
4161                 q = s->y_dc_scale;
4162             else
4163                 q = s->c_dc_scale;
4164             q = q << 3;
4165         } else
4166             /* For AIC we skip quant/dequant of INTRADC */
4167             q = 1 << 3;
4168
4169         /* note: block[0] is assumed to be positive */
4170         block[0] = (block[0] + (q >> 1)) / q;
4171         start_i = 1;
4172         last_non_zero = 0;
4173         qmat = s->q_intra_matrix[qscale];
4174         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4175     } else {
4176         start_i = 0;
4177         last_non_zero = -1;
4178         qmat = s->q_inter_matrix[qscale];
4179         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4180     }
4181     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4182     threshold2= (threshold1<<1);
4183     for(i=63;i>=start_i;i--) {
4184         j = scantable[i];
4185         level = block[j] * qmat[j];
4186
4187         if(((unsigned)(level+threshold1))>threshold2){
4188             last_non_zero = i;
4189             break;
4190         }else{
4191             block[j]=0;
4192         }
4193     }
4194     for(i=start_i; i<=last_non_zero; i++) {
4195         j = scantable[i];
4196         level = block[j] * qmat[j];
4197
4198 //        if(   bias+level >= (1<<QMAT_SHIFT)
4199 //           || bias-level >= (1<<QMAT_SHIFT)){
4200         if(((unsigned)(level+threshold1))>threshold2){
4201             if(level>0){
4202                 level= (bias + level)>>QMAT_SHIFT;
4203                 block[j]= level;
4204             }else{
4205                 level= (bias - level)>>QMAT_SHIFT;
4206                 block[j]= -level;
4207             }
4208             max |=level;
4209         }else{
4210             block[j]=0;
4211         }
4212     }
4213     *overflow= s->max_qcoeff < max; //overflow might have happened
4214
4215     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4216     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4217         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4218
4219     return last_non_zero;
4220 }
4221
4222 #define OFFSET(x) offsetof(MpegEncContext, x)
4223 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4224 static const AVOption h263_options[] = {
4225     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4226     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4227     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4228     FF_MPV_COMMON_OPTS
4229     { NULL },
4230 };
4231
4232 static const AVClass h263_class = {
4233     .class_name = "H.263 encoder",
4234     .item_name  = av_default_item_name,
4235     .option     = h263_options,
4236     .version    = LIBAVUTIL_VERSION_INT,
4237 };
4238
4239 AVCodec ff_h263_encoder = {
4240     .name           = "h263",
4241     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4242     .type           = AVMEDIA_TYPE_VIDEO,
4243     .id             = AV_CODEC_ID_H263,
4244     .priv_data_size = sizeof(MpegEncContext),
4245     .init           = ff_MPV_encode_init,
4246     .encode2        = ff_MPV_encode_picture,
4247     .close          = ff_MPV_encode_end,
4248     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4249     .priv_class     = &h263_class,
4250 };
4251
4252 static const AVOption h263p_options[] = {
4253     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4254     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4255     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4256     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4257     FF_MPV_COMMON_OPTS
4258     { NULL },
4259 };
4260 static const AVClass h263p_class = {
4261     .class_name = "H.263p encoder",
4262     .item_name  = av_default_item_name,
4263     .option     = h263p_options,
4264     .version    = LIBAVUTIL_VERSION_INT,
4265 };
4266
4267 AVCodec ff_h263p_encoder = {
4268     .name           = "h263p",
4269     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4270     .type           = AVMEDIA_TYPE_VIDEO,
4271     .id             = AV_CODEC_ID_H263P,
4272     .priv_data_size = sizeof(MpegEncContext),
4273     .init           = ff_MPV_encode_init,
4274     .encode2        = ff_MPV_encode_picture,
4275     .close          = ff_MPV_encode_end,
4276     .capabilities   = CODEC_CAP_SLICE_THREADS,
4277     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4278     .priv_class     = &h263p_class,
4279 };
4280
4281 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4282
4283 AVCodec ff_msmpeg4v2_encoder = {
4284     .name           = "msmpeg4v2",
4285     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4286     .type           = AVMEDIA_TYPE_VIDEO,
4287     .id             = AV_CODEC_ID_MSMPEG4V2,
4288     .priv_data_size = sizeof(MpegEncContext),
4289     .init           = ff_MPV_encode_init,
4290     .encode2        = ff_MPV_encode_picture,
4291     .close          = ff_MPV_encode_end,
4292     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4293     .priv_class     = &msmpeg4v2_class,
4294 };
4295
4296 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4297
4298 AVCodec ff_msmpeg4v3_encoder = {
4299     .name           = "msmpeg4",
4300     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4301     .type           = AVMEDIA_TYPE_VIDEO,
4302     .id             = AV_CODEC_ID_MSMPEG4V3,
4303     .priv_data_size = sizeof(MpegEncContext),
4304     .init           = ff_MPV_encode_init,
4305     .encode2        = ff_MPV_encode_picture,
4306     .close          = ff_MPV_encode_end,
4307     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4308     .priv_class     = &msmpeg4v3_class,
4309 };
4310
4311 FF_MPV_GENERIC_CLASS(wmv1)
4312
4313 AVCodec ff_wmv1_encoder = {
4314     .name           = "wmv1",
4315     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4316     .type           = AVMEDIA_TYPE_VIDEO,
4317     .id             = AV_CODEC_ID_WMV1,
4318     .priv_data_size = sizeof(MpegEncContext),
4319     .init           = ff_MPV_encode_init,
4320     .encode2        = ff_MPV_encode_picture,
4321     .close          = ff_MPV_encode_end,
4322     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4323     .priv_class     = &wmv1_class,
4324 };