git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "dsputil.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "h261.h"
  44 #include "h263.h"
  45 #include "mathops.h"
  46 #include "mpegutils.h"
  47 #include "mjpegenc.h"
  48 #include "msmpeg4.h"
  49 #include "faandct.h"
  50 #include "thread.h"
  51 #include "aandcttab.h"
  52 #include "flv.h"
  53 #include "mpeg4video.h"
  54 #include "internal.h"
  55 #include "bytestream.h"
  56 #include <limits.h>
  57
  58 static int encode_picture(MpegEncContext *s, int picture_number);
  59 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  60 static int sse_mb(MpegEncContext *s);
  61 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  62 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  63
  64 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  65 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  66
  67 const AVOption ff_mpv_generic_options[] = {
  68     FF_MPV_COMMON_OPTS
  69     { NULL },
  70 };
  71
  72 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
  73                        uint16_t (*qmat16)[2][64],
  74                        const uint16_t *quant_matrix,
  75                        int bias, int qmin, int qmax, int intra)
  76 {
  77     int qscale;
  78     int shift = 0;
  79
  80     for (qscale = qmin; qscale <= qmax; qscale++) {
  81         int i;
  82         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
  83             dsp->fdct == ff_jpeg_fdct_islow_10 ||
  84             dsp->fdct == ff_faandct) {
  85             for (i = 0; i < 64; i++) {
  86                 const int j = dsp->idct_permutation[i];
  87                 /* 16 <= qscale * quant_matrix[i] <= 7905
  88                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  89                  *             19952 <=              x  <= 249205026
  90                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
  91                  *           3444240 >= (1 << 36) / (x) >= 275 */
  92
  93                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
  94                                         (qscale * quant_matrix[j]));
  95             }
  96         } else if (dsp->fdct == ff_fdct_ifast) {
  97             for (i = 0; i < 64; i++) {
  98                 const int j = dsp->idct_permutation[i];
  99                 /* 16 <= qscale * quant_matrix[i] <= 7905
 100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 101                  *             19952 <=              x  <= 249205026
 102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 103                  *           3444240 >= (1 << 36) / (x) >= 275 */
 104
 105                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 106                                         (ff_aanscales[i] * qscale *
 107                                          quant_matrix[j]));
 108             }
 109         } else {
 110             for (i = 0; i < 64; i++) {
 111                 const int j = dsp->idct_permutation[i];
 112                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 113                  * Assume x = qscale * quant_matrix[i]
 114                  * So             16 <=              x  <= 7905
 115                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 116                  * so          32768 >= (1 << 19) / (x) >= 67 */
 117                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 118                                         (qscale * quant_matrix[j]));
 119                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 120                 //                    (qscale * quant_matrix[i]);
 121                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 122                                        (qscale * quant_matrix[j]);
 123
 124                 if (qmat16[qscale][0][i] == 0 ||
 125                     qmat16[qscale][0][i] == 128 * 256)
 126                     qmat16[qscale][0][i] = 128 * 256 - 1;
 127                 qmat16[qscale][1][i] =
 128                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 129                                 qmat16[qscale][0][i]);
 130             }
 131         }
 132
 133         for (i = intra; i < 64; i++) {
 134             int64_t max = 8191;
 135             if (dsp->fdct == ff_fdct_ifast) {
 136                 max = (8191LL * ff_aanscales[i]) >> 14;
 137             }
 138             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 139                 shift++;
 140             }
 141         }
 142     }
 143     if (shift) {
 144         av_log(NULL, AV_LOG_INFO,
 145                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 146                QMAT_SHIFT - shift);
 147     }
 148 }
 149
 150 static inline void update_qscale(MpegEncContext *s)
 151 {
 152     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 153                 (FF_LAMBDA_SHIFT + 7);
 154     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 155
 156     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 157                  FF_LAMBDA_SHIFT;
 158 }
 159
 160 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 161 {
 162     int i;
 163
 164     if (matrix) {
 165         put_bits(pb, 1, 1);
 166         for (i = 0; i < 64; i++) {
 167             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 168         }
 169     } else
 170         put_bits(pb, 1, 0);
 171 }
 172
 173 /**
 174  * init s->current_picture.qscale_table from s->lambda_table
 175  */
 176 void ff_init_qscale_tab(MpegEncContext *s)
 177 {
 178     int8_t * const qscale_table = s->current_picture.qscale_table;
 179     int i;
 180
 181     for (i = 0; i < s->mb_num; i++) {
 182         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 183         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 184         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 185                                                   s->avctx->qmax);
 186     }
 187 }
 188
 189 static void update_duplicate_context_after_me(MpegEncContext *dst,
 190                                               MpegEncContext *src)
 191 {
 192 #define COPY(a) dst->a= src->a
 193     COPY(pict_type);
 194     COPY(current_picture);
 195     COPY(f_code);
 196     COPY(b_code);
 197     COPY(qscale);
 198     COPY(lambda);
 199     COPY(lambda2);
 200     COPY(picture_in_gop_number);
 201     COPY(gop_picture_number);
 202     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 203     COPY(progressive_frame);    // FIXME don't set in encode_header
 204     COPY(partitioned_frame);    // FIXME don't set in encode_header
 205 #undef COPY
 206 }
 207
 208 /**
 209  * Set the given MpegEncContext to defaults for encoding.
 210  * the changed fields will not depend upon the prior state of the MpegEncContext.
 211  */
 212 static void MPV_encode_defaults(MpegEncContext *s)
 213 {
 214     int i;
 215     ff_MPV_common_defaults(s);
 216
 217     for (i = -16; i < 16; i++) {
 218         default_fcode_tab[i + MAX_MV] = 1;
 219     }
 220     s->me.mv_penalty = default_mv_penalty;
 221     s->fcode_tab     = default_fcode_tab;
 222
 223     s->input_picture_number  = 0;
 224     s->picture_in_gop_number = 0;
 225 }
 226
 227 /* init video encoder */
 228 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
 229 {
 230     MpegEncContext *s = avctx->priv_data;
 231     int i, ret;
 232
 233     MPV_encode_defaults(s);
 234
 235     switch (avctx->codec_id) {
 236     case AV_CODEC_ID_MPEG2VIDEO:
 237         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 238             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 239             av_log(avctx, AV_LOG_ERROR,
 240                    "only YUV420 and YUV422 are supported\n");
 241             return -1;
 242         }
 243         break;
 244     case AV_CODEC_ID_MJPEG:
 245         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
 246             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
 247             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 248               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
 249              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
 250             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 251             return -1;
 252         }
 253         break;
 254     default:
 255         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 256             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 257             return -1;
 258         }
 259     }
 260
 261     switch (avctx->pix_fmt) {
 262     case AV_PIX_FMT_YUVJ422P:
 263     case AV_PIX_FMT_YUV422P:
 264         s->chroma_format = CHROMA_422;
 265         break;
 266     case AV_PIX_FMT_YUVJ420P:
 267     case AV_PIX_FMT_YUV420P:
 268     default:
 269         s->chroma_format = CHROMA_420;
 270         break;
 271     }
 272
 273     s->bit_rate = avctx->bit_rate;
 274     s->width    = avctx->width;
 275     s->height   = avctx->height;
 276     if (avctx->gop_size > 600 &&
 277         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 278         av_log(avctx, AV_LOG_ERROR,
 279                "Warning keyframe interval too large! reducing it ...\n");
 280         avctx->gop_size = 600;
 281     }
 282     s->gop_size     = avctx->gop_size;
 283     s->avctx        = avctx;
 284     s->flags        = avctx->flags;
 285     s->flags2       = avctx->flags2;
 286     if (avctx->max_b_frames > MAX_B_FRAMES) {
 287         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 288                "is %d.\n", MAX_B_FRAMES);
 289     }
 290     s->max_b_frames = avctx->max_b_frames;
 291     s->codec_id     = avctx->codec->id;
 292     s->strict_std_compliance = avctx->strict_std_compliance;
 293     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 294     s->mpeg_quant         = avctx->mpeg_quant;
 295     s->rtp_mode           = !!avctx->rtp_payload_size;
 296     s->intra_dc_precision = avctx->intra_dc_precision;
 297     s->user_specified_pts = AV_NOPTS_VALUE;
 298
 299     if (s->gop_size <= 1) {
 300         s->intra_only = 1;
 301         s->gop_size   = 12;
 302     } else {
 303         s->intra_only = 0;
 304     }
 305
 306     s->me_method = avctx->me_method;
 307
 308     /* Fixed QSCALE */
 309     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 310
 311     s->adaptive_quant = (s->avctx->lumi_masking ||
 312                          s->avctx->dark_masking ||
 313                          s->avctx->temporal_cplx_masking ||
 314                          s->avctx->spatial_cplx_masking  ||
 315                          s->avctx->p_masking      ||
 316                          s->avctx->border_masking ||
 317                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 318                         !s->fixed_qscale;
 319
 320     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 321
 322     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 323         av_log(avctx, AV_LOG_ERROR,
 324                "a vbv buffer size is needed, "
 325                "for encoding with a maximum bitrate\n");
 326         return -1;
 327     }
 328
 329     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 330         av_log(avctx, AV_LOG_INFO,
 331                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 332     }
 333
 334     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 335         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 336         return -1;
 337     }
 338
 339     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 340         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 341         return -1;
 342     }
 343
 344     if (avctx->rc_max_rate &&
 345         avctx->rc_max_rate == avctx->bit_rate &&
 346         avctx->rc_max_rate != avctx->rc_min_rate) {
 347         av_log(avctx, AV_LOG_INFO,
 348                "impossible bitrate constraints, this will fail\n");
 349     }
 350
 351     if (avctx->rc_buffer_size &&
 352         avctx->bit_rate * (int64_t)avctx->time_base.num >
 353             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 354         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 355         return -1;
 356     }
 357
 358     if (!s->fixed_qscale &&
 359         avctx->bit_rate * av_q2d(avctx->time_base) >
 360             avctx->bit_rate_tolerance) {
 361         av_log(avctx, AV_LOG_ERROR,
 362                "bitrate tolerance too small for bitrate\n");
 363         return -1;
 364     }
 365
 366     if (s->avctx->rc_max_rate &&
 367         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 368         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 369          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 370         90000LL * (avctx->rc_buffer_size - 1) >
 371             s->avctx->rc_max_rate * 0xFFFFLL) {
 372         av_log(avctx, AV_LOG_INFO,
 373                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 374                "specified vbv buffer is too large for the given bitrate!\n");
 375     }
 376
 377     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 378         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 379         s->codec_id != AV_CODEC_ID_FLV1) {
 380         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 381         return -1;
 382     }
 383
 384     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 385         av_log(avctx, AV_LOG_ERROR,
 386                "OBMC is only supported with simple mb decision\n");
 387         return -1;
 388     }
 389
 390     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 391         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 392         return -1;
 393     }
 394
 395     if (s->max_b_frames                    &&
 396         s->codec_id != AV_CODEC_ID_MPEG4      &&
 397         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 398         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 399         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 400         return -1;
 401     }
 402
 403     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 404          s->codec_id == AV_CODEC_ID_H263  ||
 405          s->codec_id == AV_CODEC_ID_H263P) &&
 406         (avctx->sample_aspect_ratio.num > 255 ||
 407          avctx->sample_aspect_ratio.den > 255)) {
 408         av_log(avctx, AV_LOG_ERROR,
 409                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
 410                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 411         return -1;
 412     }
 413
 414     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 415         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 416         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 417         return -1;
 418     }
 419
 420     // FIXME mpeg2 uses that too
 421     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
 422         av_log(avctx, AV_LOG_ERROR,
 423                "mpeg2 style quantization not supported by codec\n");
 424         return -1;
 425     }
 426
 427     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 428         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 429         return -1;
 430     }
 431
 432     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 433         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 434         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 435         return -1;
 436     }
 437
 438     if (s->avctx->scenechange_threshold < 1000000000 &&
 439         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 440         av_log(avctx, AV_LOG_ERROR,
 441                "closed gop with scene change detection are not supported yet, "
 442                "set threshold to 1000000000\n");
 443         return -1;
 444     }
 445
 446     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 447         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 448             av_log(avctx, AV_LOG_ERROR,
 449                   "low delay forcing is only available for mpeg2\n");
 450             return -1;
 451         }
 452         if (s->max_b_frames != 0) {
 453             av_log(avctx, AV_LOG_ERROR,
 454                    "b frames cannot be used with low delay\n");
 455             return -1;
 456         }
 457     }
 458
 459     if (s->q_scale_type == 1) {
 460         if (avctx->qmax > 12) {
 461             av_log(avctx, AV_LOG_ERROR,
 462                    "non linear quant only supports qmax <= 12 currently\n");
 463             return -1;
 464         }
 465     }
 466
 467     if (s->avctx->thread_count > 1         &&
 468         s->codec_id != AV_CODEC_ID_MPEG4      &&
 469         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 470         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 471         (s->codec_id != AV_CODEC_ID_H263P)) {
 472         av_log(avctx, AV_LOG_ERROR,
 473                "multi threaded encoding not supported by codec\n");
 474         return -1;
 475     }
 476
 477     if (s->avctx->thread_count < 1) {
 478         av_log(avctx, AV_LOG_ERROR,
 479                "automatic thread number detection not supported by codec,"
 480                "patch welcome\n");
 481         return -1;
 482     }
 483
 484     if (s->avctx->thread_count > 1)
 485         s->rtp_mode = 1;
 486
 487     if (!avctx->time_base.den || !avctx->time_base.num) {
 488         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 489         return -1;
 490     }
 491
 492     i = (INT_MAX / 2 + 128) >> 8;
 493     if (avctx->mb_threshold >= i) {
 494         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 495                i - 1);
 496         return -1;
 497     }
 498
 499     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 500         av_log(avctx, AV_LOG_INFO,
 501                "notice: b_frame_strategy only affects the first pass\n");
 502         avctx->b_frame_strategy = 0;
 503     }
 504
 505     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 506     if (i > 1) {
 507         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 508         avctx->time_base.den /= i;
 509         avctx->time_base.num /= i;
 510         //return -1;
 511     }
 512
 513     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 514         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
 515         // (a + x * 3 / 8) / x
 516         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 517         s->inter_quant_bias = 0;
 518     } else {
 519         s->intra_quant_bias = 0;
 520         // (a - x / 4) / x
 521         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 522     }
 523
 524     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 525         s->intra_quant_bias = avctx->intra_quant_bias;
 526     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 527         s->inter_quant_bias = avctx->inter_quant_bias;
 528
 529     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 530         s->avctx->time_base.den > (1 << 16) - 1) {
 531         av_log(avctx, AV_LOG_ERROR,
 532                "timebase %d/%d not supported by MPEG 4 standard, "
 533                "the maximum admitted value for the timebase denominator "
 534                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 535                (1 << 16) - 1);
 536         return -1;
 537     }
 538     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 539
 540     switch (avctx->codec->id) {
 541     case AV_CODEC_ID_MPEG1VIDEO:
 542         s->out_format = FMT_MPEG1;
 543         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 544         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 545         break;
 546     case AV_CODEC_ID_MPEG2VIDEO:
 547         s->out_format = FMT_MPEG1;
 548         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 549         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 550         s->rtp_mode   = 1;
 551         break;
 552     case AV_CODEC_ID_MJPEG:
 553         s->out_format = FMT_MJPEG;
 554         s->intra_only = 1; /* force intra only for jpeg */
 555         if (!CONFIG_MJPEG_ENCODER ||
 556             ff_mjpeg_encode_init(s) < 0)
 557             return -1;
 558         avctx->delay = 0;
 559         s->low_delay = 1;
 560         break;
 561     case AV_CODEC_ID_H261:
 562         if (!CONFIG_H261_ENCODER)
 563             return -1;
 564         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 565             av_log(avctx, AV_LOG_ERROR,
 566                    "The specified picture size of %dx%d is not valid for the "
 567                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 568                     s->width, s->height);
 569             return -1;
 570         }
 571         s->out_format = FMT_H261;
 572         avctx->delay  = 0;
 573         s->low_delay  = 1;
 574         break;
 575     case AV_CODEC_ID_H263:
 576         if (!CONFIG_H263_ENCODER)
 577         return -1;
 578         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 579                              s->width, s->height) == 8) {
 580             av_log(avctx, AV_LOG_INFO,
 581                    "The specified picture size of %dx%d is not valid for "
 582                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 583                    "352x288, 704x576, and 1408x1152."
 584                    "Try H.263+.\n", s->width, s->height);
 585             return -1;
 586         }
 587         s->out_format = FMT_H263;
 588         avctx->delay  = 0;
 589         s->low_delay  = 1;
 590         break;
 591     case AV_CODEC_ID_H263P:
 592         s->out_format = FMT_H263;
 593         s->h263_plus  = 1;
 594         /* Fx */
 595         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 596         s->modified_quant  = s->h263_aic;
 597         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 598         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 599
 600         /* /Fx */
 601         /* These are just to be sure */
 602         avctx->delay = 0;
 603         s->low_delay = 1;
 604         break;
 605     case AV_CODEC_ID_FLV1:
 606         s->out_format      = FMT_H263;
 607         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 608         s->unrestricted_mv = 1;
 609         s->rtp_mode  = 0; /* don't allow GOB */
 610         avctx->delay = 0;
 611         s->low_delay = 1;
 612         break;
 613     case AV_CODEC_ID_RV10:
 614         s->out_format = FMT_H263;
 615         avctx->delay  = 0;
 616         s->low_delay  = 1;
 617         break;
 618     case AV_CODEC_ID_RV20:
 619         s->out_format      = FMT_H263;
 620         avctx->delay       = 0;
 621         s->low_delay       = 1;
 622         s->modified_quant  = 1;
 623         s->h263_aic        = 1;
 624         s->h263_plus       = 1;
 625         s->loop_filter     = 1;
 626         s->unrestricted_mv = 0;
 627         break;
 628     case AV_CODEC_ID_MPEG4:
 629         s->out_format      = FMT_H263;
 630         s->h263_pred       = 1;
 631         s->unrestricted_mv = 1;
 632         s->low_delay       = s->max_b_frames ? 0 : 1;
 633         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 634         break;
 635     case AV_CODEC_ID_MSMPEG4V2:
 636         s->out_format      = FMT_H263;
 637         s->h263_pred       = 1;
 638         s->unrestricted_mv = 1;
 639         s->msmpeg4_version = 2;
 640         avctx->delay       = 0;
 641         s->low_delay       = 1;
 642         break;
 643     case AV_CODEC_ID_MSMPEG4V3:
 644         s->out_format        = FMT_H263;
 645         s->h263_pred         = 1;
 646         s->unrestricted_mv   = 1;
 647         s->msmpeg4_version   = 3;
 648         s->flipflop_rounding = 1;
 649         avctx->delay         = 0;
 650         s->low_delay         = 1;
 651         break;
 652     case AV_CODEC_ID_WMV1:
 653         s->out_format        = FMT_H263;
 654         s->h263_pred         = 1;
 655         s->unrestricted_mv   = 1;
 656         s->msmpeg4_version   = 4;
 657         s->flipflop_rounding = 1;
 658         avctx->delay         = 0;
 659         s->low_delay         = 1;
 660         break;
 661     case AV_CODEC_ID_WMV2:
 662         s->out_format        = FMT_H263;
 663         s->h263_pred         = 1;
 664         s->unrestricted_mv   = 1;
 665         s->msmpeg4_version   = 5;
 666         s->flipflop_rounding = 1;
 667         avctx->delay         = 0;
 668         s->low_delay         = 1;
 669         break;
 670     default:
 671         return -1;
 672     }
 673
 674     avctx->has_b_frames = !s->low_delay;
 675
 676     s->encoding = 1;
 677
 678     s->progressive_frame    =
 679     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 680                                                 CODEC_FLAG_INTERLACED_ME) ||
 681                                 s->alternate_scan);
 682
 683     /* init */
 684     if (ff_MPV_common_init(s) < 0)
 685         return -1;
 686
 687     if (ARCH_X86)
 688         ff_MPV_encode_init_x86(s);
 689
 690     s->avctx->coded_frame = s->current_picture.f;
 691
 692     if (s->msmpeg4_version) {
 693         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 694                           2 * 2 * (MAX_LEVEL + 1) *
 695                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 696     }
 697     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 698
 699     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 700     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 701     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 702     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 703     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 704                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 705     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 706                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 707
 708     if (s->avctx->noise_reduction) {
 709         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 710                           2 * 64 * sizeof(uint16_t), fail);
 711     }
 712
 713     if (CONFIG_H263_ENCODER)
 714         ff_h263dsp_init(&s->h263dsp);
 715     if (!s->dct_quantize)
 716         s->dct_quantize = ff_dct_quantize_c;
 717     if (!s->denoise_dct)
 718         s->denoise_dct  = denoise_dct_c;
 719     s->fast_dct_quantize = s->dct_quantize;
 720     if (avctx->trellis)
 721         s->dct_quantize  = dct_quantize_trellis_c;
 722
 723     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 724         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 725
 726     s->quant_precision = 5;
 727
 728     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
 729     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
 730
 731     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 732         ff_h261_encode_init(s);
 733     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 734         ff_h263_encode_init(s);
 735     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 736         ff_msmpeg4_encode_init(s);
 737     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 738         && s->out_format == FMT_MPEG1)
 739         ff_mpeg1_encode_init(s);
 740
 741     /* init q matrix */
 742     for (i = 0; i < 64; i++) {
 743         int j = s->dsp.idct_permutation[i];
 744         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 745             s->mpeg_quant) {
 746             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 747             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 748         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 749             s->intra_matrix[j] =
 750             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 751         } else {
 752             /* mpeg1/2 */
 753             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 754             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 755         }
 756         if (s->avctx->intra_matrix)
 757             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 758         if (s->avctx->inter_matrix)
 759             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 760     }
 761
 762     /* precompute matrix */
 763     /* for mjpeg, we do include qscale in the matrix */
 764     if (s->out_format != FMT_MJPEG) {
 765         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
 766                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 767                           31, 1);
 768         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
 769                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 770                           31, 0);
 771     }
 772
 773     if (ff_rate_control_init(s) < 0)
 774         return -1;
 775
 776 #if FF_API_ERROR_RATE
 777     FF_DISABLE_DEPRECATION_WARNINGS
 778     if (avctx->error_rate)
 779         s->error_rate = avctx->error_rate;
 780     FF_ENABLE_DEPRECATION_WARNINGS;
 781 #endif
 782
 783 #if FF_API_NORMALIZE_AQP
 784     FF_DISABLE_DEPRECATION_WARNINGS
 785     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 786         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 787     FF_ENABLE_DEPRECATION_WARNINGS;
 788 #endif
 789
 790 #if FF_API_MV0
 791     FF_DISABLE_DEPRECATION_WARNINGS
 792     if (avctx->flags & CODEC_FLAG_MV0)
 793         s->mpv_flags |= FF_MPV_FLAG_MV0;
 794     FF_ENABLE_DEPRECATION_WARNINGS
 795 #endif
 796
 797     if (avctx->b_frame_strategy == 2) {
 798         for (i = 0; i < s->max_b_frames + 2; i++) {
 799             s->tmp_frames[i] = av_frame_alloc();
 800             if (!s->tmp_frames[i])
 801                 return AVERROR(ENOMEM);
 802
 803             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 804             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 805             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 806
 807             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 808             if (ret < 0)
 809                 return ret;
 810         }
 811     }
 812
 813     return 0;
 814 fail:
 815     ff_MPV_encode_end(avctx);
 816     return AVERROR_UNKNOWN;
 817 }
 818
 819 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
 820 {
 821     MpegEncContext *s = avctx->priv_data;
 822     int i;
 823
 824     ff_rate_control_uninit(s);
 825
 826     ff_MPV_common_end(s);
 827     if (CONFIG_MJPEG_ENCODER &&
 828         s->out_format == FMT_MJPEG)
 829         ff_mjpeg_encode_close(s);
 830
 831     av_freep(&avctx->extradata);
 832
 833     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 834         av_frame_free(&s->tmp_frames[i]);
 835
 836     ff_free_picture_tables(&s->new_picture);
 837     ff_mpeg_unref_picture(s, &s->new_picture);
 838
 839     av_freep(&s->avctx->stats_out);
 840     av_freep(&s->ac_stats);
 841
 842     av_freep(&s->q_intra_matrix);
 843     av_freep(&s->q_inter_matrix);
 844     av_freep(&s->q_intra_matrix16);
 845     av_freep(&s->q_inter_matrix16);
 846     av_freep(&s->input_picture);
 847     av_freep(&s->reordered_input_picture);
 848     av_freep(&s->dct_offset);
 849
 850     return 0;
 851 }
 852
 853 static int get_sae(uint8_t *src, int ref, int stride)
 854 {
 855     int x,y;
 856     int acc = 0;
 857
 858     for (y = 0; y < 16; y++) {
 859         for (x = 0; x < 16; x++) {
 860             acc += FFABS(src[x + y * stride] - ref);
 861         }
 862     }
 863
 864     return acc;
 865 }
 866
 867 static int get_intra_count(MpegEncContext *s, uint8_t *src,
 868                            uint8_t *ref, int stride)
 869 {
 870     int x, y, w, h;
 871     int acc = 0;
 872
 873     w = s->width  & ~15;
 874     h = s->height & ~15;
 875
 876     for (y = 0; y < h; y += 16) {
 877         for (x = 0; x < w; x += 16) {
 878             int offset = x + y * stride;
 879             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
 880                                      16);
 881             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
 882             int sae  = get_sae(src + offset, mean, stride);
 883
 884             acc += sae + 500 < sad;
 885         }
 886     }
 887     return acc;
 888 }
 889
 890
 891 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
 892 {
 893     Picture *pic = NULL;
 894     int64_t pts;
 895     int i, display_picture_number = 0, ret;
 896     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
 897                                                  (s->low_delay ? 0 : 1);
 898     int direct = 1;
 899
 900     if (pic_arg) {
 901         pts = pic_arg->pts;
 902         display_picture_number = s->input_picture_number++;
 903
 904         if (pts != AV_NOPTS_VALUE) {
 905             if (s->user_specified_pts != AV_NOPTS_VALUE) {
 906                 int64_t time = pts;
 907                 int64_t last = s->user_specified_pts;
 908
 909                 if (time <= last) {
 910                     av_log(s->avctx, AV_LOG_ERROR,
 911                            "Error, Invalid timestamp=%"PRId64", "
 912                            "last=%"PRId64"\n", pts, s->user_specified_pts);
 913                     return -1;
 914                 }
 915
 916                 if (!s->low_delay && display_picture_number == 1)
 917                     s->dts_delta = time - last;
 918             }
 919             s->user_specified_pts = pts;
 920         } else {
 921             if (s->user_specified_pts != AV_NOPTS_VALUE) {
 922                 s->user_specified_pts =
 923                 pts = s->user_specified_pts + 1;
 924                 av_log(s->avctx, AV_LOG_INFO,
 925                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
 926                        pts);
 927             } else {
 928                 pts = display_picture_number;
 929             }
 930         }
 931     }
 932
 933     if (pic_arg) {
 934         if (!pic_arg->buf[0]);
 935             direct = 0;
 936         if (pic_arg->linesize[0] != s->linesize)
 937             direct = 0;
 938         if (pic_arg->linesize[1] != s->uvlinesize)
 939             direct = 0;
 940         if (pic_arg->linesize[2] != s->uvlinesize)
 941             direct = 0;
 942
 943         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
 944                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
 945
 946         if (direct) {
 947             i = ff_find_unused_picture(s, 1);
 948             if (i < 0)
 949                 return i;
 950
 951             pic = &s->picture[i];
 952             pic->reference = 3;
 953
 954             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
 955                 return ret;
 956             if (ff_alloc_picture(s, pic, 1) < 0) {
 957                 return -1;
 958             }
 959         } else {
 960             i = ff_find_unused_picture(s, 0);
 961             if (i < 0)
 962                 return i;
 963
 964             pic = &s->picture[i];
 965             pic->reference = 3;
 966
 967             if (ff_alloc_picture(s, pic, 0) < 0) {
 968                 return -1;
 969             }
 970
 971             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
 972                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
 973                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
 974                 // empty
 975             } else {
 976                 int h_chroma_shift, v_chroma_shift;
 977                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
 978                                                  &h_chroma_shift,
 979                                                  &v_chroma_shift);
 980
 981                 for (i = 0; i < 3; i++) {
 982                     int src_stride = pic_arg->linesize[i];
 983                     int dst_stride = i ? s->uvlinesize : s->linesize;
 984                     int h_shift = i ? h_chroma_shift : 0;
 985                     int v_shift = i ? v_chroma_shift : 0;
 986                     int w = s->width  >> h_shift;
 987                     int h = s->height >> v_shift;
 988                     uint8_t *src = pic_arg->data[i];
 989                     uint8_t *dst = pic->f->data[i];
 990
 991                     if (!s->avctx->rc_buffer_size)
 992                         dst += INPLACE_OFFSET;
 993
 994                     if (src_stride == dst_stride)
 995                         memcpy(dst, src, src_stride * h);
 996                     else {
 997                         while (h--) {
 998                             memcpy(dst, src, w);
 999                             dst += dst_stride;
1000                             src += src_stride;
1001                         }
1002                     }
1003                 }
1004             }
1005         }
1006         ret = av_frame_copy_props(pic->f, pic_arg);
1007         if (ret < 0)
1008             return ret;
1009
1010         pic->f->display_picture_number = display_picture_number;
1011         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1012     }
1013
1014     /* shift buffer entries */
1015     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1016         s->input_picture[i - 1] = s->input_picture[i];
1017
1018     s->input_picture[encoding_delay] = (Picture*) pic;
1019
1020     return 0;
1021 }
1022
1023 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1024 {
1025     int x, y, plane;
1026     int score = 0;
1027     int64_t score64 = 0;
1028
1029     for (plane = 0; plane < 3; plane++) {
1030         const int stride = p->f->linesize[plane];
1031         const int bw = plane ? 1 : 2;
1032         for (y = 0; y < s->mb_height * bw; y++) {
1033             for (x = 0; x < s->mb_width * bw; x++) {
1034                 int off = p->shared ? 0 : 16;
1035                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1036                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1037                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1038
1039                 switch (s->avctx->frame_skip_exp) {
1040                 case 0: score    =  FFMAX(score, v);          break;
1041                 case 1: score   += FFABS(v);                  break;
1042                 case 2: score   += v * v;                     break;
1043                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1044                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1045                 }
1046             }
1047         }
1048     }
1049
1050     if (score)
1051         score64 = score;
1052
1053     if (score64 < s->avctx->frame_skip_threshold)
1054         return 1;
1055     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1056         return 1;
1057     return 0;
1058 }
1059
1060 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1061 {
1062     AVPacket pkt = { 0 };
1063     int ret, got_output;
1064
1065     av_init_packet(&pkt);
1066     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1067     if (ret < 0)
1068         return ret;
1069
1070     ret = pkt.size;
1071     av_free_packet(&pkt);
1072     return ret;
1073 }
1074
1075 static int estimate_best_b_count(MpegEncContext *s)
1076 {
1077     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1078     AVCodecContext *c = avcodec_alloc_context3(NULL);
1079     const int scale = s->avctx->brd_scale;
1080     int i, j, out_size, p_lambda, b_lambda, lambda2;
1081     int64_t best_rd  = INT64_MAX;
1082     int best_b_count = -1;
1083
1084     assert(scale >= 0 && scale <= 3);
1085
1086     //emms_c();
1087     //s->next_picture_ptr->quality;
1088     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1089     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1090     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1091     if (!b_lambda) // FIXME we should do this somewhere else
1092         b_lambda = p_lambda;
1093     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1094                FF_LAMBDA_SHIFT;
1095
1096     c->width        = s->width  >> scale;
1097     c->height       = s->height >> scale;
1098     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1099     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1100     c->mb_decision  = s->avctx->mb_decision;
1101     c->me_cmp       = s->avctx->me_cmp;
1102     c->mb_cmp       = s->avctx->mb_cmp;
1103     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1104     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1105     c->time_base    = s->avctx->time_base;
1106     c->max_b_frames = s->max_b_frames;
1107
1108     if (avcodec_open2(c, codec, NULL) < 0)
1109         return -1;
1110
1111     for (i = 0; i < s->max_b_frames + 2; i++) {
1112         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1113                                                 s->next_picture_ptr;
1114
1115         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1116             pre_input = *pre_input_ptr;
1117
1118             if (!pre_input.shared && i) {
1119                 pre_input.f->data[0] += INPLACE_OFFSET;
1120                 pre_input.f->data[1] += INPLACE_OFFSET;
1121                 pre_input.f->data[2] += INPLACE_OFFSET;
1122             }
1123
1124             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1125                                  pre_input.f->data[0], pre_input.f->linesize[0],
1126                                  c->width,      c->height);
1127             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1128                                  pre_input.f->data[1], pre_input.f->linesize[1],
1129                                  c->width >> 1, c->height >> 1);
1130             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1131                                  pre_input.f->data[2], pre_input.f->linesize[2],
1132                                  c->width >> 1, c->height >> 1);
1133         }
1134     }
1135
1136     for (j = 0; j < s->max_b_frames + 1; j++) {
1137         int64_t rd = 0;
1138
1139         if (!s->input_picture[j])
1140             break;
1141
1142         c->error[0] = c->error[1] = c->error[2] = 0;
1143
1144         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1145         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1146
1147         out_size = encode_frame(c, s->tmp_frames[0]);
1148
1149         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1150
1151         for (i = 0; i < s->max_b_frames + 1; i++) {
1152             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1153
1154             s->tmp_frames[i + 1]->pict_type = is_p ?
1155                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1156             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1157
1158             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1159
1160             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1161         }
1162
1163         /* get the delayed frames */
1164         while (out_size) {
1165             out_size = encode_frame(c, NULL);
1166             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1167         }
1168
1169         rd += c->error[0] + c->error[1] + c->error[2];
1170
1171         if (rd < best_rd) {
1172             best_rd = rd;
1173             best_b_count = j;
1174         }
1175     }
1176
1177     avcodec_close(c);
1178     av_freep(&c);
1179
1180     return best_b_count;
1181 }
1182
1183 static int select_input_picture(MpegEncContext *s)
1184 {
1185     int i, ret;
1186
1187     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1188         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1189     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1190
1191     /* set next picture type & ordering */
1192     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1193         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1194             s->next_picture_ptr == NULL || s->intra_only) {
1195             s->reordered_input_picture[0] = s->input_picture[0];
1196             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1197             s->reordered_input_picture[0]->f->coded_picture_number =
1198                 s->coded_picture_number++;
1199         } else {
1200             int b_frames;
1201
1202             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1203                 if (s->picture_in_gop_number < s->gop_size &&
1204                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1205                     // FIXME check that te gop check above is +-1 correct
1206                     av_frame_unref(s->input_picture[0]->f);
1207
1208                     emms_c();
1209                     ff_vbv_update(s, 0);
1210
1211                     goto no_output_pic;
1212                 }
1213             }
1214
1215             if (s->flags & CODEC_FLAG_PASS2) {
1216                 for (i = 0; i < s->max_b_frames + 1; i++) {
1217                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1218
1219                     if (pict_num >= s->rc_context.num_entries)
1220                         break;
1221                     if (!s->input_picture[i]) {
1222                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1223                         break;
1224                     }
1225
1226                     s->input_picture[i]->f->pict_type =
1227                         s->rc_context.entry[pict_num].new_pict_type;
1228                 }
1229             }
1230
1231             if (s->avctx->b_frame_strategy == 0) {
1232                 b_frames = s->max_b_frames;
1233                 while (b_frames && !s->input_picture[b_frames])
1234                     b_frames--;
1235             } else if (s->avctx->b_frame_strategy == 1) {
1236                 for (i = 1; i < s->max_b_frames + 1; i++) {
1237                     if (s->input_picture[i] &&
1238                         s->input_picture[i]->b_frame_score == 0) {
1239                         s->input_picture[i]->b_frame_score =
1240                             get_intra_count(s,
1241                                             s->input_picture[i    ]->f->data[0],
1242                                             s->input_picture[i - 1]->f->data[0],
1243                                             s->linesize) + 1;
1244                     }
1245                 }
1246                 for (i = 0; i < s->max_b_frames + 1; i++) {
1247                     if (s->input_picture[i] == NULL ||
1248                         s->input_picture[i]->b_frame_score - 1 >
1249                             s->mb_num / s->avctx->b_sensitivity)
1250                         break;
1251                 }
1252
1253                 b_frames = FFMAX(0, i - 1);
1254
1255                 /* reset scores */
1256                 for (i = 0; i < b_frames + 1; i++) {
1257                     s->input_picture[i]->b_frame_score = 0;
1258                 }
1259             } else if (s->avctx->b_frame_strategy == 2) {
1260                 b_frames = estimate_best_b_count(s);
1261             } else {
1262                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1263                 b_frames = 0;
1264             }
1265
1266             emms_c();
1267
1268             for (i = b_frames - 1; i >= 0; i--) {
1269                 int type = s->input_picture[i]->f->pict_type;
1270                 if (type && type != AV_PICTURE_TYPE_B)
1271                     b_frames = i;
1272             }
1273             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1274                 b_frames == s->max_b_frames) {
1275                 av_log(s->avctx, AV_LOG_ERROR,
1276                        "warning, too many b frames in a row\n");
1277             }
1278
1279             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1280                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1281                     s->gop_size > s->picture_in_gop_number) {
1282                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1283                 } else {
1284                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1285                         b_frames = 0;
1286                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1287                 }
1288             }
1289
1290             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1291                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1292                 b_frames--;
1293
1294             s->reordered_input_picture[0] = s->input_picture[b_frames];
1295             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1296                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1297             s->reordered_input_picture[0]->f->coded_picture_number =
1298                 s->coded_picture_number++;
1299             for (i = 0; i < b_frames; i++) {
1300                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1301                 s->reordered_input_picture[i + 1]->f->pict_type =
1302                     AV_PICTURE_TYPE_B;
1303                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1304                     s->coded_picture_number++;
1305             }
1306         }
1307     }
1308 no_output_pic:
1309     if (s->reordered_input_picture[0]) {
1310         s->reordered_input_picture[0]->reference =
1311            s->reordered_input_picture[0]->f->pict_type !=
1312                AV_PICTURE_TYPE_B ? 3 : 0;
1313
1314         ff_mpeg_unref_picture(s, &s->new_picture);
1315         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1316             return ret;
1317
1318         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1319             // input is a shared pix, so we can't modifiy it -> alloc a new
1320             // one & ensure that the shared one is reuseable
1321
1322             Picture *pic;
1323             int i = ff_find_unused_picture(s, 0);
1324             if (i < 0)
1325                 return i;
1326             pic = &s->picture[i];
1327
1328             pic->reference = s->reordered_input_picture[0]->reference;
1329             if (ff_alloc_picture(s, pic, 0) < 0) {
1330                 return -1;
1331             }
1332
1333             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1334             if (ret < 0)
1335                 return ret;
1336
1337             /* mark us unused / free shared pic */
1338             av_frame_unref(s->reordered_input_picture[0]->f);
1339             s->reordered_input_picture[0]->shared = 0;
1340
1341             s->current_picture_ptr = pic;
1342         } else {
1343             // input is not a shared pix -> reuse buffer for current_pix
1344             s->current_picture_ptr = s->reordered_input_picture[0];
1345             for (i = 0; i < 4; i++) {
1346                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1347             }
1348         }
1349         ff_mpeg_unref_picture(s, &s->current_picture);
1350         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1351                                        s->current_picture_ptr)) < 0)
1352             return ret;
1353
1354         s->picture_number = s->new_picture.f->display_picture_number;
1355     } else {
1356         ff_mpeg_unref_picture(s, &s->new_picture);
1357     }
1358     return 0;
1359 }
1360
1361 static void frame_end(MpegEncContext *s)
1362 {
1363     int i;
1364
1365     if (s->unrestricted_mv &&
1366         s->current_picture.reference &&
1367         !s->intra_only) {
1368         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1369         int hshift = desc->log2_chroma_w;
1370         int vshift = desc->log2_chroma_h;
1371         s->dsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1372                           s->h_edge_pos, s->v_edge_pos,
1373                           EDGE_WIDTH, EDGE_WIDTH,
1374                           EDGE_TOP | EDGE_BOTTOM);
1375         s->dsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1376                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1377                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1378                           EDGE_TOP | EDGE_BOTTOM);
1379         s->dsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1380                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1381                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1382                           EDGE_TOP | EDGE_BOTTOM);
1383     }
1384
1385     emms_c();
1386
1387     s->last_pict_type                 = s->pict_type;
1388     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1389     if (s->pict_type!= AV_PICTURE_TYPE_B)
1390         s->last_non_b_pict_type = s->pict_type;
1391
1392     if (s->encoding) {
1393         /* release non-reference frames */
1394         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1395             if (!s->picture[i].reference)
1396                 ff_mpeg_unref_picture(s, &s->picture[i]);
1397         }
1398     }
1399
1400     s->avctx->coded_frame = s->current_picture_ptr->f;
1401
1402 }
1403
1404 static void update_noise_reduction(MpegEncContext *s)
1405 {
1406     int intra, i;
1407
1408     for (intra = 0; intra < 2; intra++) {
1409         if (s->dct_count[intra] > (1 << 16)) {
1410             for (i = 0; i < 64; i++) {
1411                 s->dct_error_sum[intra][i] >>= 1;
1412             }
1413             s->dct_count[intra] >>= 1;
1414         }
1415
1416         for (i = 0; i < 64; i++) {
1417             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1418                                        s->dct_count[intra] +
1419                                        s->dct_error_sum[intra][i] / 2) /
1420                                       (s->dct_error_sum[intra][i] + 1);
1421         }
1422     }
1423 }
1424
1425 static int frame_start(MpegEncContext *s)
1426 {
1427     int ret;
1428
1429     /* mark & release old frames */
1430     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1431         s->last_picture_ptr != s->next_picture_ptr &&
1432         s->last_picture_ptr->f->buf[0]) {
1433         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1434     }
1435
1436     s->current_picture_ptr->f->pict_type = s->pict_type;
1437     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1438
1439     ff_mpeg_unref_picture(s, &s->current_picture);
1440     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1441                                    s->current_picture_ptr)) < 0)
1442         return ret;
1443
1444     if (s->pict_type != AV_PICTURE_TYPE_B) {
1445         s->last_picture_ptr = s->next_picture_ptr;
1446         if (!s->droppable)
1447             s->next_picture_ptr = s->current_picture_ptr;
1448     }
1449
1450     if (s->last_picture_ptr) {
1451         ff_mpeg_unref_picture(s, &s->last_picture);
1452         if (s->last_picture_ptr->f->buf[0] &&
1453             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1454                                        s->last_picture_ptr)) < 0)
1455             return ret;
1456     }
1457     if (s->next_picture_ptr) {
1458         ff_mpeg_unref_picture(s, &s->next_picture);
1459         if (s->next_picture_ptr->f->buf[0] &&
1460             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1461                                        s->next_picture_ptr)) < 0)
1462             return ret;
1463     }
1464
1465     if (s->picture_structure!= PICT_FRAME) {
1466         int i;
1467         for (i = 0; i < 4; i++) {
1468             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1469                 s->current_picture.f->data[i] +=
1470                     s->current_picture.f->linesize[i];
1471             }
1472             s->current_picture.f->linesize[i] *= 2;
1473             s->last_picture.f->linesize[i]    *= 2;
1474             s->next_picture.f->linesize[i]    *= 2;
1475         }
1476     }
1477
1478     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1479         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1480         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1481     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1482         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1483         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1484     } else {
1485         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1486         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1487     }
1488
1489     if (s->dct_error_sum) {
1490         assert(s->avctx->noise_reduction && s->encoding);
1491         update_noise_reduction(s);
1492     }
1493
1494     return 0;
1495 }
1496
1497 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1498                           const AVFrame *pic_arg, int *got_packet)
1499 {
1500     MpegEncContext *s = avctx->priv_data;
1501     int i, stuffing_count, ret;
1502     int context_count = s->slice_context_count;
1503
1504     s->picture_in_gop_number++;
1505
1506     if (load_input_picture(s, pic_arg) < 0)
1507         return -1;
1508
1509     if (select_input_picture(s) < 0) {
1510         return -1;
1511     }
1512
1513     /* output? */
1514     if (s->new_picture.f->data[0]) {
1515         if (!pkt->data &&
1516             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1517             return ret;
1518         if (s->mb_info) {
1519             s->mb_info_ptr = av_packet_new_side_data(pkt,
1520                                  AV_PKT_DATA_H263_MB_INFO,
1521                                  s->mb_width*s->mb_height*12);
1522             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1523         }
1524
1525         for (i = 0; i < context_count; i++) {
1526             int start_y = s->thread_context[i]->start_mb_y;
1527             int   end_y = s->thread_context[i]->  end_mb_y;
1528             int h       = s->mb_height;
1529             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1530             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1531
1532             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1533         }
1534
1535         s->pict_type = s->new_picture.f->pict_type;
1536         //emms_c();
1537         ret = frame_start(s);
1538         if (ret < 0)
1539             return ret;
1540 vbv_retry:
1541         if (encode_picture(s, s->picture_number) < 0)
1542             return -1;
1543
1544         avctx->header_bits = s->header_bits;
1545         avctx->mv_bits     = s->mv_bits;
1546         avctx->misc_bits   = s->misc_bits;
1547         avctx->i_tex_bits  = s->i_tex_bits;
1548         avctx->p_tex_bits  = s->p_tex_bits;
1549         avctx->i_count     = s->i_count;
1550         // FIXME f/b_count in avctx
1551         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1552         avctx->skip_count  = s->skip_count;
1553
1554         frame_end(s);
1555
1556         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1557             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1558
1559         if (avctx->rc_buffer_size) {
1560             RateControlContext *rcc = &s->rc_context;
1561             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1562
1563             if (put_bits_count(&s->pb) > max_size &&
1564                 s->lambda < s->avctx->lmax) {
1565                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1566                                        (s->qscale + 1) / s->qscale);
1567                 if (s->adaptive_quant) {
1568                     int i;
1569                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1570                         s->lambda_table[i] =
1571                             FFMAX(s->lambda_table[i] + 1,
1572                                   s->lambda_table[i] * (s->qscale + 1) /
1573                                   s->qscale);
1574                 }
1575                 s->mb_skipped = 0;        // done in frame_start()
1576                 // done in encode_picture() so we must undo it
1577                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1578                     if (s->flipflop_rounding          ||
1579                         s->codec_id == AV_CODEC_ID_H263P ||
1580                         s->codec_id == AV_CODEC_ID_MPEG4)
1581                         s->no_rounding ^= 1;
1582                 }
1583                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1584                     s->time_base       = s->last_time_base;
1585                     s->last_non_b_time = s->time - s->pp_time;
1586                 }
1587                 for (i = 0; i < context_count; i++) {
1588                     PutBitContext *pb = &s->thread_context[i]->pb;
1589                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1590                 }
1591                 goto vbv_retry;
1592             }
1593
1594             assert(s->avctx->rc_max_rate);
1595         }
1596
1597         if (s->flags & CODEC_FLAG_PASS1)
1598             ff_write_pass1_stats(s);
1599
1600         for (i = 0; i < 4; i++) {
1601             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1602             avctx->error[i] += s->current_picture_ptr->f->error[i];
1603         }
1604
1605         if (s->flags & CODEC_FLAG_PASS1)
1606             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1607                    avctx->i_tex_bits + avctx->p_tex_bits ==
1608                        put_bits_count(&s->pb));
1609         flush_put_bits(&s->pb);
1610         s->frame_bits  = put_bits_count(&s->pb);
1611
1612         stuffing_count = ff_vbv_update(s, s->frame_bits);
1613         if (stuffing_count) {
1614             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1615                     stuffing_count + 50) {
1616                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1617                 return -1;
1618             }
1619
1620             switch (s->codec_id) {
1621             case AV_CODEC_ID_MPEG1VIDEO:
1622             case AV_CODEC_ID_MPEG2VIDEO:
1623                 while (stuffing_count--) {
1624                     put_bits(&s->pb, 8, 0);
1625                 }
1626             break;
1627             case AV_CODEC_ID_MPEG4:
1628                 put_bits(&s->pb, 16, 0);
1629                 put_bits(&s->pb, 16, 0x1C3);
1630                 stuffing_count -= 4;
1631                 while (stuffing_count--) {
1632                     put_bits(&s->pb, 8, 0xFF);
1633                 }
1634             break;
1635             default:
1636                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1637             }
1638             flush_put_bits(&s->pb);
1639             s->frame_bits  = put_bits_count(&s->pb);
1640         }
1641
1642         /* update mpeg1/2 vbv_delay for CBR */
1643         if (s->avctx->rc_max_rate                          &&
1644             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1645             s->out_format == FMT_MPEG1                     &&
1646             90000LL * (avctx->rc_buffer_size - 1) <=
1647                 s->avctx->rc_max_rate * 0xFFFFLL) {
1648             int vbv_delay, min_delay;
1649             double inbits  = s->avctx->rc_max_rate *
1650                              av_q2d(s->avctx->time_base);
1651             int    minbits = s->frame_bits - 8 *
1652                              (s->vbv_delay_ptr - s->pb.buf - 1);
1653             double bits    = s->rc_context.buffer_index + minbits - inbits;
1654
1655             if (bits < 0)
1656                 av_log(s->avctx, AV_LOG_ERROR,
1657                        "Internal error, negative bits\n");
1658
1659             assert(s->repeat_first_field == 0);
1660
1661             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1662             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1663                         s->avctx->rc_max_rate;
1664
1665             vbv_delay = FFMAX(vbv_delay, min_delay);
1666
1667             assert(vbv_delay < 0xFFFF);
1668
1669             s->vbv_delay_ptr[0] &= 0xF8;
1670             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1671             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1672             s->vbv_delay_ptr[2] &= 0x07;
1673             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1674             avctx->vbv_delay     = vbv_delay * 300;
1675         }
1676         s->total_bits     += s->frame_bits;
1677         avctx->frame_bits  = s->frame_bits;
1678
1679         pkt->pts = s->current_picture.f->pts;
1680         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1681             if (!s->current_picture.f->coded_picture_number)
1682                 pkt->dts = pkt->pts - s->dts_delta;
1683             else
1684                 pkt->dts = s->reordered_pts;
1685             s->reordered_pts = pkt->pts;
1686         } else
1687             pkt->dts = pkt->pts;
1688         if (s->current_picture.f->key_frame)
1689             pkt->flags |= AV_PKT_FLAG_KEY;
1690         if (s->mb_info)
1691             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1692     } else {
1693         s->frame_bits = 0;
1694     }
1695     assert((s->frame_bits & 7) == 0);
1696
1697     pkt->size = s->frame_bits / 8;
1698     *got_packet = !!pkt->size;
1699     return 0;
1700 }
1701
1702 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1703                                                 int n, int threshold)
1704 {
1705     static const char tab[64] = {
1706         3, 2, 2, 1, 1, 1, 1, 1,
1707         1, 1, 1, 1, 1, 1, 1, 1,
1708         1, 1, 1, 1, 1, 1, 1, 1,
1709         0, 0, 0, 0, 0, 0, 0, 0,
1710         0, 0, 0, 0, 0, 0, 0, 0,
1711         0, 0, 0, 0, 0, 0, 0, 0,
1712         0, 0, 0, 0, 0, 0, 0, 0,
1713         0, 0, 0, 0, 0, 0, 0, 0
1714     };
1715     int score = 0;
1716     int run = 0;
1717     int i;
1718     int16_t *block = s->block[n];
1719     const int last_index = s->block_last_index[n];
1720     int skip_dc;
1721
1722     if (threshold < 0) {
1723         skip_dc = 0;
1724         threshold = -threshold;
1725     } else
1726         skip_dc = 1;
1727
1728     /* Are all we could set to zero already zero? */
1729     if (last_index <= skip_dc - 1)
1730         return;
1731
1732     for (i = 0; i <= last_index; i++) {
1733         const int j = s->intra_scantable.permutated[i];
1734         const int level = FFABS(block[j]);
1735         if (level == 1) {
1736             if (skip_dc && i == 0)
1737                 continue;
1738             score += tab[run];
1739             run = 0;
1740         } else if (level > 1) {
1741             return;
1742         } else {
1743             run++;
1744         }
1745     }
1746     if (score >= threshold)
1747         return;
1748     for (i = skip_dc; i <= last_index; i++) {
1749         const int j = s->intra_scantable.permutated[i];
1750         block[j] = 0;
1751     }
1752     if (block[0])
1753         s->block_last_index[n] = 0;
1754     else
1755         s->block_last_index[n] = -1;
1756 }
1757
1758 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1759                                int last_index)
1760 {
1761     int i;
1762     const int maxlevel = s->max_qcoeff;
1763     const int minlevel = s->min_qcoeff;
1764     int overflow = 0;
1765
1766     if (s->mb_intra) {
1767         i = 1; // skip clipping of intra dc
1768     } else
1769         i = 0;
1770
1771     for (; i <= last_index; i++) {
1772         const int j = s->intra_scantable.permutated[i];
1773         int level = block[j];
1774
1775         if (level > maxlevel) {
1776             level = maxlevel;
1777             overflow++;
1778         } else if (level < minlevel) {
1779             level = minlevel;
1780             overflow++;
1781         }
1782
1783         block[j] = level;
1784     }
1785
1786     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1787         av_log(s->avctx, AV_LOG_INFO,
1788                "warning, clipping %d dct coefficients to %d..%d\n",
1789                overflow, minlevel, maxlevel);
1790 }
1791
1792 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1793 {
1794     int x, y;
1795     // FIXME optimize
1796     for (y = 0; y < 8; y++) {
1797         for (x = 0; x < 8; x++) {
1798             int x2, y2;
1799             int sum = 0;
1800             int sqr = 0;
1801             int count = 0;
1802
1803             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1804                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1805                     int v = ptr[x2 + y2 * stride];
1806                     sum += v;
1807                     sqr += v * v;
1808                     count++;
1809                 }
1810             }
1811             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1812         }
1813     }
1814 }
1815
1816 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1817                                                 int motion_x, int motion_y,
1818                                                 int mb_block_height,
1819                                                 int mb_block_count)
1820 {
1821     int16_t weight[8][64];
1822     int16_t orig[8][64];
1823     const int mb_x = s->mb_x;
1824     const int mb_y = s->mb_y;
1825     int i;
1826     int skip_dct[8];
1827     int dct_offset = s->linesize * 8; // default for progressive frames
1828     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1829     ptrdiff_t wrap_y, wrap_c;
1830
1831     for (i = 0; i < mb_block_count; i++)
1832         skip_dct[i] = s->skipdct;
1833
1834     if (s->adaptive_quant) {
1835         const int last_qp = s->qscale;
1836         const int mb_xy = mb_x + mb_y * s->mb_stride;
1837
1838         s->lambda = s->lambda_table[mb_xy];
1839         update_qscale(s);
1840
1841         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1842             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1843             s->dquant = s->qscale - last_qp;
1844
1845             if (s->out_format == FMT_H263) {
1846                 s->dquant = av_clip(s->dquant, -2, 2);
1847
1848                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1849                     if (!s->mb_intra) {
1850                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1851                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1852                                 s->dquant = 0;
1853                         }
1854                         if (s->mv_type == MV_TYPE_8X8)
1855                             s->dquant = 0;
1856                     }
1857                 }
1858             }
1859         }
1860         ff_set_qscale(s, last_qp + s->dquant);
1861     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1862         ff_set_qscale(s, s->qscale + s->dquant);
1863
1864     wrap_y = s->linesize;
1865     wrap_c = s->uvlinesize;
1866     ptr_y  = s->new_picture.f->data[0] +
1867              (mb_y * 16 * wrap_y)              + mb_x * 16;
1868     ptr_cb = s->new_picture.f->data[1] +
1869              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1870     ptr_cr = s->new_picture.f->data[2] +
1871              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1872
1873     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1874         uint8_t *ebuf = s->edge_emu_buffer + 32;
1875         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1876                                  wrap_y, wrap_y,
1877                                  16, 16, mb_x * 16, mb_y * 16,
1878                                  s->width, s->height);
1879         ptr_y = ebuf;
1880         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1881                                  wrap_c, wrap_c,
1882                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1883                                  s->width >> 1, s->height >> 1);
1884         ptr_cb = ebuf + 18 * wrap_y;
1885         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1886                                  wrap_c, wrap_c,
1887                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1888                                  s->width >> 1, s->height >> 1);
1889         ptr_cr = ebuf + 18 * wrap_y + 8;
1890     }
1891
1892     if (s->mb_intra) {
1893         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1894             int progressive_score, interlaced_score;
1895
1896             s->interlaced_dct = 0;
1897             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1898                                                     NULL, wrap_y, 8) +
1899                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1900                                                     NULL, wrap_y, 8) - 400;
1901
1902             if (progressive_score > 0) {
1903                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1904                                                        NULL, wrap_y * 2, 8) +
1905                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1906                                                        NULL, wrap_y * 2, 8);
1907                 if (progressive_score > interlaced_score) {
1908                     s->interlaced_dct = 1;
1909
1910                     dct_offset = wrap_y;
1911                     wrap_y <<= 1;
1912                     if (s->chroma_format == CHROMA_422)
1913                         wrap_c <<= 1;
1914                 }
1915             }
1916         }
1917
1918         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1919         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1920         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1921         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1922
1923         if (s->flags & CODEC_FLAG_GRAY) {
1924             skip_dct[4] = 1;
1925             skip_dct[5] = 1;
1926         } else {
1927             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1928             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1929             if (!s->chroma_y_shift) { /* 422 */
1930                 s->dsp.get_pixels(s->block[6],
1931                                   ptr_cb + (dct_offset >> 1), wrap_c);
1932                 s->dsp.get_pixels(s->block[7],
1933                                   ptr_cr + (dct_offset >> 1), wrap_c);
1934             }
1935         }
1936     } else {
1937         op_pixels_func (*op_pix)[4];
1938         qpel_mc_func (*op_qpix)[16];
1939         uint8_t *dest_y, *dest_cb, *dest_cr;
1940
1941         dest_y  = s->dest[0];
1942         dest_cb = s->dest[1];
1943         dest_cr = s->dest[2];
1944
1945         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1946             op_pix  = s->hdsp.put_pixels_tab;
1947             op_qpix = s->dsp.put_qpel_pixels_tab;
1948         } else {
1949             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1950             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1951         }
1952
1953         if (s->mv_dir & MV_DIR_FORWARD) {
1954             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1955                           s->last_picture.f->data,
1956                           op_pix, op_qpix);
1957             op_pix  = s->hdsp.avg_pixels_tab;
1958             op_qpix = s->dsp.avg_qpel_pixels_tab;
1959         }
1960         if (s->mv_dir & MV_DIR_BACKWARD) {
1961             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1962                           s->next_picture.f->data,
1963                           op_pix, op_qpix);
1964         }
1965
1966         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1967             int progressive_score, interlaced_score;
1968
1969             s->interlaced_dct = 0;
1970             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1971                                                     ptr_y,              wrap_y,
1972                                                     8) +
1973                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1974                                                     ptr_y + wrap_y * 8, wrap_y,
1975                                                     8) - 400;
1976
1977             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1978                 progressive_score -= 400;
1979
1980             if (progressive_score > 0) {
1981                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1982                                                        ptr_y,
1983                                                        wrap_y * 2, 8) +
1984                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1985                                                        ptr_y + wrap_y,
1986                                                        wrap_y * 2, 8);
1987
1988                 if (progressive_score > interlaced_score) {
1989                     s->interlaced_dct = 1;
1990
1991                     dct_offset = wrap_y;
1992                     wrap_y <<= 1;
1993                     if (s->chroma_format == CHROMA_422)
1994                         wrap_c <<= 1;
1995                 }
1996             }
1997         }
1998
1999         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2000         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2001         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2002                            dest_y + dct_offset, wrap_y);
2003         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2004                            dest_y + dct_offset + 8, wrap_y);
2005
2006         if (s->flags & CODEC_FLAG_GRAY) {
2007             skip_dct[4] = 1;
2008             skip_dct[5] = 1;
2009         } else {
2010             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2011             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2012             if (!s->chroma_y_shift) { /* 422 */
2013                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2014                                    dest_cb + (dct_offset >> 1), wrap_c);
2015                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2016                                    dest_cr + (dct_offset >> 1), wrap_c);
2017             }
2018         }
2019         /* pre quantization */
2020         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2021                 2 * s->qscale * s->qscale) {
2022             // FIXME optimize
2023             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2024                               wrap_y, 8) < 20 * s->qscale)
2025                 skip_dct[0] = 1;
2026             if (s->dsp.sad[1](NULL, ptr_y + 8,
2027                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2028                 skip_dct[1] = 1;
2029             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2030                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2031                 skip_dct[2] = 1;
2032             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2033                               dest_y + dct_offset + 8,
2034                               wrap_y, 8) < 20 * s->qscale)
2035                 skip_dct[3] = 1;
2036             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2037                               wrap_c, 8) < 20 * s->qscale)
2038                 skip_dct[4] = 1;
2039             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2040                               wrap_c, 8) < 20 * s->qscale)
2041                 skip_dct[5] = 1;
2042             if (!s->chroma_y_shift) { /* 422 */
2043                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2044                                   dest_cb + (dct_offset >> 1),
2045                                   wrap_c, 8) < 20 * s->qscale)
2046                     skip_dct[6] = 1;
2047                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2048                                   dest_cr + (dct_offset >> 1),
2049                                   wrap_c, 8) < 20 * s->qscale)
2050                     skip_dct[7] = 1;
2051             }
2052         }
2053     }
2054
2055     if (s->quantizer_noise_shaping) {
2056         if (!skip_dct[0])
2057             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2058         if (!skip_dct[1])
2059             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2060         if (!skip_dct[2])
2061             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2062         if (!skip_dct[3])
2063             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2064         if (!skip_dct[4])
2065             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2066         if (!skip_dct[5])
2067             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2068         if (!s->chroma_y_shift) { /* 422 */
2069             if (!skip_dct[6])
2070                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2071                                   wrap_c);
2072             if (!skip_dct[7])
2073                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2074                                   wrap_c);
2075         }
2076         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2077     }
2078
2079     /* DCT & quantize */
2080     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2081     {
2082         for (i = 0; i < mb_block_count; i++) {
2083             if (!skip_dct[i]) {
2084                 int overflow;
2085                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2086                 // FIXME we could decide to change to quantizer instead of
2087                 // clipping
2088                 // JS: I don't think that would be a good idea it could lower
2089                 //     quality instead of improve it. Just INTRADC clipping
2090                 //     deserves changes in quantizer
2091                 if (overflow)
2092                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2093             } else
2094                 s->block_last_index[i] = -1;
2095         }
2096         if (s->quantizer_noise_shaping) {
2097             for (i = 0; i < mb_block_count; i++) {
2098                 if (!skip_dct[i]) {
2099                     s->block_last_index[i] =
2100                         dct_quantize_refine(s, s->block[i], weight[i],
2101                                             orig[i], i, s->qscale);
2102                 }
2103             }
2104         }
2105
2106         if (s->luma_elim_threshold && !s->mb_intra)
2107             for (i = 0; i < 4; i++)
2108                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2109         if (s->chroma_elim_threshold && !s->mb_intra)
2110             for (i = 4; i < mb_block_count; i++)
2111                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2112
2113         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2114             for (i = 0; i < mb_block_count; i++) {
2115                 if (s->block_last_index[i] == -1)
2116                     s->coded_score[i] = INT_MAX / 256;
2117             }
2118         }
2119     }
2120
2121     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2122         s->block_last_index[4] =
2123         s->block_last_index[5] = 0;
2124         s->block[4][0] =
2125         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2126     }
2127
2128     // non c quantize code returns incorrect block_last_index FIXME
2129     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2130         for (i = 0; i < mb_block_count; i++) {
2131             int j;
2132             if (s->block_last_index[i] > 0) {
2133                 for (j = 63; j > 0; j--) {
2134                     if (s->block[i][s->intra_scantable.permutated[j]])
2135                         break;
2136                 }
2137                 s->block_last_index[i] = j;
2138             }
2139         }
2140     }
2141
2142     /* huffman encode */
2143     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2144     case AV_CODEC_ID_MPEG1VIDEO:
2145     case AV_CODEC_ID_MPEG2VIDEO:
2146         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2147             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2148         break;
2149     case AV_CODEC_ID_MPEG4:
2150         if (CONFIG_MPEG4_ENCODER)
2151             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2152         break;
2153     case AV_CODEC_ID_MSMPEG4V2:
2154     case AV_CODEC_ID_MSMPEG4V3:
2155     case AV_CODEC_ID_WMV1:
2156         if (CONFIG_MSMPEG4_ENCODER)
2157             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2158         break;
2159     case AV_CODEC_ID_WMV2:
2160         if (CONFIG_WMV2_ENCODER)
2161             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2162         break;
2163     case AV_CODEC_ID_H261:
2164         if (CONFIG_H261_ENCODER)
2165             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2166         break;
2167     case AV_CODEC_ID_H263:
2168     case AV_CODEC_ID_H263P:
2169     case AV_CODEC_ID_FLV1:
2170     case AV_CODEC_ID_RV10:
2171     case AV_CODEC_ID_RV20:
2172         if (CONFIG_H263_ENCODER)
2173             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2174         break;
2175     case AV_CODEC_ID_MJPEG:
2176         if (CONFIG_MJPEG_ENCODER)
2177             ff_mjpeg_encode_mb(s, s->block);
2178         break;
2179     default:
2180         assert(0);
2181     }
2182 }
2183
2184 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2185 {
2186     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2187     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2188 }
2189
2190 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2191     int i;
2192
2193     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2194
2195     /* mpeg1 */
2196     d->mb_skip_run= s->mb_skip_run;
2197     for(i=0; i<3; i++)
2198         d->last_dc[i] = s->last_dc[i];
2199
2200     /* statistics */
2201     d->mv_bits= s->mv_bits;
2202     d->i_tex_bits= s->i_tex_bits;
2203     d->p_tex_bits= s->p_tex_bits;
2204     d->i_count= s->i_count;
2205     d->f_count= s->f_count;
2206     d->b_count= s->b_count;
2207     d->skip_count= s->skip_count;
2208     d->misc_bits= s->misc_bits;
2209     d->last_bits= 0;
2210
2211     d->mb_skipped= 0;
2212     d->qscale= s->qscale;
2213     d->dquant= s->dquant;
2214
2215     d->esc3_level_length= s->esc3_level_length;
2216 }
2217
2218 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2219     int i;
2220
2221     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2222     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2223
2224     /* mpeg1 */
2225     d->mb_skip_run= s->mb_skip_run;
2226     for(i=0; i<3; i++)
2227         d->last_dc[i] = s->last_dc[i];
2228
2229     /* statistics */
2230     d->mv_bits= s->mv_bits;
2231     d->i_tex_bits= s->i_tex_bits;
2232     d->p_tex_bits= s->p_tex_bits;
2233     d->i_count= s->i_count;
2234     d->f_count= s->f_count;
2235     d->b_count= s->b_count;
2236     d->skip_count= s->skip_count;
2237     d->misc_bits= s->misc_bits;
2238
2239     d->mb_intra= s->mb_intra;
2240     d->mb_skipped= s->mb_skipped;
2241     d->mv_type= s->mv_type;
2242     d->mv_dir= s->mv_dir;
2243     d->pb= s->pb;
2244     if(s->data_partitioning){
2245         d->pb2= s->pb2;
2246         d->tex_pb= s->tex_pb;
2247     }
2248     d->block= s->block;
2249     for(i=0; i<8; i++)
2250         d->block_last_index[i]= s->block_last_index[i];
2251     d->interlaced_dct= s->interlaced_dct;
2252     d->qscale= s->qscale;
2253
2254     d->esc3_level_length= s->esc3_level_length;
2255 }
2256
2257 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2258                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2259                            int *dmin, int *next_block, int motion_x, int motion_y)
2260 {
2261     int score;
2262     uint8_t *dest_backup[3];
2263
2264     copy_context_before_encode(s, backup, type);
2265
2266     s->block= s->blocks[*next_block];
2267     s->pb= pb[*next_block];
2268     if(s->data_partitioning){
2269         s->pb2   = pb2   [*next_block];
2270         s->tex_pb= tex_pb[*next_block];
2271     }
2272
2273     if(*next_block){
2274         memcpy(dest_backup, s->dest, sizeof(s->dest));
2275         s->dest[0] = s->rd_scratchpad;
2276         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2277         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2278         assert(s->linesize >= 32); //FIXME
2279     }
2280
2281     encode_mb(s, motion_x, motion_y);
2282
2283     score= put_bits_count(&s->pb);
2284     if(s->data_partitioning){
2285         score+= put_bits_count(&s->pb2);
2286         score+= put_bits_count(&s->tex_pb);
2287     }
2288
2289     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2290         ff_MPV_decode_mb(s, s->block);
2291
2292         score *= s->lambda2;
2293         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2294     }
2295
2296     if(*next_block){
2297         memcpy(s->dest, dest_backup, sizeof(s->dest));
2298     }
2299
2300     if(score<*dmin){
2301         *dmin= score;
2302         *next_block^=1;
2303
2304         copy_context_after_encode(best, s, type);
2305     }
2306 }
2307
2308 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2309     uint32_t *sq = ff_square_tab + 256;
2310     int acc=0;
2311     int x,y;
2312
2313     if(w==16 && h==16)
2314         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2315     else if(w==8 && h==8)
2316         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2317
2318     for(y=0; y<h; y++){
2319         for(x=0; x<w; x++){
2320             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2321         }
2322     }
2323
2324     assert(acc>=0);
2325
2326     return acc;
2327 }
2328
2329 static int sse_mb(MpegEncContext *s){
2330     int w= 16;
2331     int h= 16;
2332
2333     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2334     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2335
2336     if(w==16 && h==16)
2337       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2338         return  s->dsp.nsse[0](s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2339                +s->dsp.nsse[1](s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2340                +s->dsp.nsse[1](s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2341       }else{
2342         return  s->dsp.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2343                +s->dsp.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2344                +s->dsp.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2345       }
2346     else
2347         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2348                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2349                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2350 }
2351
2352 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2353     MpegEncContext *s= *(void**)arg;
2354
2355
2356     s->me.pre_pass=1;
2357     s->me.dia_size= s->avctx->pre_dia_size;
2358     s->first_slice_line=1;
2359     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2360         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2361             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2362         }
2363         s->first_slice_line=0;
2364     }
2365
2366     s->me.pre_pass=0;
2367
2368     return 0;
2369 }
2370
2371 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2372     MpegEncContext *s= *(void**)arg;
2373
2374     s->me.dia_size= s->avctx->dia_size;
2375     s->first_slice_line=1;
2376     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2377         s->mb_x=0; //for block init below
2378         ff_init_block_index(s);
2379         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2380             s->block_index[0]+=2;
2381             s->block_index[1]+=2;
2382             s->block_index[2]+=2;
2383             s->block_index[3]+=2;
2384
2385             /* compute motion vector & mb_type and store in context */
2386             if(s->pict_type==AV_PICTURE_TYPE_B)
2387                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2388             else
2389                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2390         }
2391         s->first_slice_line=0;
2392     }
2393     return 0;
2394 }
2395
2396 static int mb_var_thread(AVCodecContext *c, void *arg){
2397     MpegEncContext *s= *(void**)arg;
2398     int mb_x, mb_y;
2399
2400     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2401         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2402             int xx = mb_x * 16;
2403             int yy = mb_y * 16;
2404             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2405             int varc;
2406             int sum = s->dsp.pix_sum(pix, s->linesize);
2407
2408             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2409
2410             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2411             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2412             s->me.mb_var_sum_temp    += varc;
2413         }
2414     }
2415     return 0;
2416 }
2417
2418 static void write_slice_end(MpegEncContext *s){
2419     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2420         if(s->partitioned_frame){
2421             ff_mpeg4_merge_partitions(s);
2422         }
2423
2424         ff_mpeg4_stuffing(&s->pb);
2425     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2426         ff_mjpeg_encode_stuffing(&s->pb);
2427     }
2428
2429     avpriv_align_put_bits(&s->pb);
2430     flush_put_bits(&s->pb);
2431
2432     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2433         s->misc_bits+= get_bits_diff(s);
2434 }
2435
2436 static void write_mb_info(MpegEncContext *s)
2437 {
2438     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2439     int offset = put_bits_count(&s->pb);
2440     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2441     int gobn = s->mb_y / s->gob_index;
2442     int pred_x, pred_y;
2443     if (CONFIG_H263_ENCODER)
2444         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2445     bytestream_put_le32(&ptr, offset);
2446     bytestream_put_byte(&ptr, s->qscale);
2447     bytestream_put_byte(&ptr, gobn);
2448     bytestream_put_le16(&ptr, mba);
2449     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2450     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2451     /* 4MV not implemented */
2452     bytestream_put_byte(&ptr, 0); /* hmv2 */
2453     bytestream_put_byte(&ptr, 0); /* vmv2 */
2454 }
2455
2456 static void update_mb_info(MpegEncContext *s, int startcode)
2457 {
2458     if (!s->mb_info)
2459         return;
2460     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2461         s->mb_info_size += 12;
2462         s->prev_mb_info = s->last_mb_info;
2463     }
2464     if (startcode) {
2465         s->prev_mb_info = put_bits_count(&s->pb)/8;
2466         /* This might have incremented mb_info_size above, and we return without
2467          * actually writing any info into that slot yet. But in that case,
2468          * this will be called again at the start of the after writing the
2469          * start code, actually writing the mb info. */
2470         return;
2471     }
2472
2473     s->last_mb_info = put_bits_count(&s->pb)/8;
2474     if (!s->mb_info_size)
2475         s->mb_info_size += 12;
2476     write_mb_info(s);
2477 }
2478
2479 static int encode_thread(AVCodecContext *c, void *arg){
2480     MpegEncContext *s= *(void**)arg;
2481     int mb_x, mb_y, pdif = 0;
2482     int chr_h= 16>>s->chroma_y_shift;
2483     int i, j;
2484     MpegEncContext best_s, backup_s;
2485     uint8_t bit_buf[2][MAX_MB_BYTES];
2486     uint8_t bit_buf2[2][MAX_MB_BYTES];
2487     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2488     PutBitContext pb[2], pb2[2], tex_pb[2];
2489
2490     for(i=0; i<2; i++){
2491         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2492         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2493         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2494     }
2495
2496     s->last_bits= put_bits_count(&s->pb);
2497     s->mv_bits=0;
2498     s->misc_bits=0;
2499     s->i_tex_bits=0;
2500     s->p_tex_bits=0;
2501     s->i_count=0;
2502     s->f_count=0;
2503     s->b_count=0;
2504     s->skip_count=0;
2505
2506     for(i=0; i<3; i++){
2507         /* init last dc values */
2508         /* note: quant matrix value (8) is implied here */
2509         s->last_dc[i] = 128 << s->intra_dc_precision;
2510
2511         s->current_picture.f->error[i] = 0;
2512     }
2513     s->mb_skip_run = 0;
2514     memset(s->last_mv, 0, sizeof(s->last_mv));
2515
2516     s->last_mv_dir = 0;
2517
2518     switch(s->codec_id){
2519     case AV_CODEC_ID_H263:
2520     case AV_CODEC_ID_H263P:
2521     case AV_CODEC_ID_FLV1:
2522         if (CONFIG_H263_ENCODER)
2523             s->gob_index = ff_h263_get_gob_height(s);
2524         break;
2525     case AV_CODEC_ID_MPEG4:
2526         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2527             ff_mpeg4_init_partitions(s);
2528         break;
2529     }
2530
2531     s->resync_mb_x=0;
2532     s->resync_mb_y=0;
2533     s->first_slice_line = 1;
2534     s->ptr_lastgob = s->pb.buf;
2535     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2536         s->mb_x=0;
2537         s->mb_y= mb_y;
2538
2539         ff_set_qscale(s, s->qscale);
2540         ff_init_block_index(s);
2541
2542         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2543             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2544             int mb_type= s->mb_type[xy];
2545 //            int d;
2546             int dmin= INT_MAX;
2547             int dir;
2548
2549             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2550                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2551                 return -1;
2552             }
2553             if(s->data_partitioning){
2554                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2555                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2556                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2557                     return -1;
2558                 }
2559             }
2560
2561             s->mb_x = mb_x;
2562             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2563             ff_update_block_index(s);
2564
2565             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2566                 ff_h261_reorder_mb_index(s);
2567                 xy= s->mb_y*s->mb_stride + s->mb_x;
2568                 mb_type= s->mb_type[xy];
2569             }
2570
2571             /* write gob / video packet header  */
2572             if(s->rtp_mode){
2573                 int current_packet_size, is_gob_start;
2574
2575                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2576
2577                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2578
2579                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2580
2581                 switch(s->codec_id){
2582                 case AV_CODEC_ID_H263:
2583                 case AV_CODEC_ID_H263P:
2584                     if(!s->h263_slice_structured)
2585                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2586                     break;
2587                 case AV_CODEC_ID_MPEG2VIDEO:
2588                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2589                 case AV_CODEC_ID_MPEG1VIDEO:
2590                     if(s->mb_skip_run) is_gob_start=0;
2591                     break;
2592                 }
2593
2594                 if(is_gob_start){
2595                     if(s->start_mb_y != mb_y || mb_x!=0){
2596                         write_slice_end(s);
2597
2598                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2599                             ff_mpeg4_init_partitions(s);
2600                         }
2601                     }
2602
2603                     assert((put_bits_count(&s->pb)&7) == 0);
2604                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2605
2606                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2607                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2608                         int d = 100 / s->error_rate;
2609                         if(r % d == 0){
2610                             current_packet_size=0;
2611                             s->pb.buf_ptr= s->ptr_lastgob;
2612                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2613                         }
2614                     }
2615
2616                     if (s->avctx->rtp_callback){
2617                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2618                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2619                     }
2620                     update_mb_info(s, 1);
2621
2622                     switch(s->codec_id){
2623                     case AV_CODEC_ID_MPEG4:
2624                         if (CONFIG_MPEG4_ENCODER) {
2625                             ff_mpeg4_encode_video_packet_header(s);
2626                             ff_mpeg4_clean_buffers(s);
2627                         }
2628                     break;
2629                     case AV_CODEC_ID_MPEG1VIDEO:
2630                     case AV_CODEC_ID_MPEG2VIDEO:
2631                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2632                             ff_mpeg1_encode_slice_header(s);
2633                             ff_mpeg1_clean_buffers(s);
2634                         }
2635                     break;
2636                     case AV_CODEC_ID_H263:
2637                     case AV_CODEC_ID_H263P:
2638                         if (CONFIG_H263_ENCODER)
2639                             ff_h263_encode_gob_header(s, mb_y);
2640                     break;
2641                     }
2642
2643                     if(s->flags&CODEC_FLAG_PASS1){
2644                         int bits= put_bits_count(&s->pb);
2645                         s->misc_bits+= bits - s->last_bits;
2646                         s->last_bits= bits;
2647                     }
2648
2649                     s->ptr_lastgob += current_packet_size;
2650                     s->first_slice_line=1;
2651                     s->resync_mb_x=mb_x;
2652                     s->resync_mb_y=mb_y;
2653                 }
2654             }
2655
2656             if(  (s->resync_mb_x   == s->mb_x)
2657                && s->resync_mb_y+1 == s->mb_y){
2658                 s->first_slice_line=0;
2659             }
2660
2661             s->mb_skipped=0;
2662             s->dquant=0; //only for QP_RD
2663
2664             update_mb_info(s, 0);
2665
2666             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2667                 int next_block=0;
2668                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2669
2670                 copy_context_before_encode(&backup_s, s, -1);
2671                 backup_s.pb= s->pb;
2672                 best_s.data_partitioning= s->data_partitioning;
2673                 best_s.partitioned_frame= s->partitioned_frame;
2674                 if(s->data_partitioning){
2675                     backup_s.pb2= s->pb2;
2676                     backup_s.tex_pb= s->tex_pb;
2677                 }
2678
2679                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2680                     s->mv_dir = MV_DIR_FORWARD;
2681                     s->mv_type = MV_TYPE_16X16;
2682                     s->mb_intra= 0;
2683                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2684                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2685                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2686                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2687                 }
2688                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2689                     s->mv_dir = MV_DIR_FORWARD;
2690                     s->mv_type = MV_TYPE_FIELD;
2691                     s->mb_intra= 0;
2692                     for(i=0; i<2; i++){
2693                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2694                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2695                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2696                     }
2697                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2698                                  &dmin, &next_block, 0, 0);
2699                 }
2700                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2701                     s->mv_dir = MV_DIR_FORWARD;
2702                     s->mv_type = MV_TYPE_16X16;
2703                     s->mb_intra= 0;
2704                     s->mv[0][0][0] = 0;
2705                     s->mv[0][0][1] = 0;
2706                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2707                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2708                 }
2709                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2710                     s->mv_dir = MV_DIR_FORWARD;
2711                     s->mv_type = MV_TYPE_8X8;
2712                     s->mb_intra= 0;
2713                     for(i=0; i<4; i++){
2714                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2715                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2716                     }
2717                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2718                                  &dmin, &next_block, 0, 0);
2719                 }
2720                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2721                     s->mv_dir = MV_DIR_FORWARD;
2722                     s->mv_type = MV_TYPE_16X16;
2723                     s->mb_intra= 0;
2724                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2725                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2726                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2727                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2728                 }
2729                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2730                     s->mv_dir = MV_DIR_BACKWARD;
2731                     s->mv_type = MV_TYPE_16X16;
2732                     s->mb_intra= 0;
2733                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2734                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2735                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2736                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2737                 }
2738                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2739                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2740                     s->mv_type = MV_TYPE_16X16;
2741                     s->mb_intra= 0;
2742                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2743                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2744                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2745                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2746                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2747                                  &dmin, &next_block, 0, 0);
2748                 }
2749                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2750                     s->mv_dir = MV_DIR_FORWARD;
2751                     s->mv_type = MV_TYPE_FIELD;
2752                     s->mb_intra= 0;
2753                     for(i=0; i<2; i++){
2754                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2755                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2756                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2757                     }
2758                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2759                                  &dmin, &next_block, 0, 0);
2760                 }
2761                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2762                     s->mv_dir = MV_DIR_BACKWARD;
2763                     s->mv_type = MV_TYPE_FIELD;
2764                     s->mb_intra= 0;
2765                     for(i=0; i<2; i++){
2766                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2767                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2768                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2769                     }
2770                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2771                                  &dmin, &next_block, 0, 0);
2772                 }
2773                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2774                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2775                     s->mv_type = MV_TYPE_FIELD;
2776                     s->mb_intra= 0;
2777                     for(dir=0; dir<2; dir++){
2778                         for(i=0; i<2; i++){
2779                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2780                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2781                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2782                         }
2783                     }
2784                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2785                                  &dmin, &next_block, 0, 0);
2786                 }
2787                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2788                     s->mv_dir = 0;
2789                     s->mv_type = MV_TYPE_16X16;
2790                     s->mb_intra= 1;
2791                     s->mv[0][0][0] = 0;
2792                     s->mv[0][0][1] = 0;
2793                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2794                                  &dmin, &next_block, 0, 0);
2795                     if(s->h263_pred || s->h263_aic){
2796                         if(best_s.mb_intra)
2797                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2798                         else
2799                             ff_clean_intra_table_entries(s); //old mode?
2800                     }
2801                 }
2802
2803                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2804                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2805                         const int last_qp= backup_s.qscale;
2806                         int qpi, qp, dc[6];
2807                         int16_t ac[6][16];
2808                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2809                         static const int dquant_tab[4]={-1,1,-2,2};
2810
2811                         assert(backup_s.dquant == 0);
2812
2813                         //FIXME intra
2814                         s->mv_dir= best_s.mv_dir;
2815                         s->mv_type = MV_TYPE_16X16;
2816                         s->mb_intra= best_s.mb_intra;
2817                         s->mv[0][0][0] = best_s.mv[0][0][0];
2818                         s->mv[0][0][1] = best_s.mv[0][0][1];
2819                         s->mv[1][0][0] = best_s.mv[1][0][0];
2820                         s->mv[1][0][1] = best_s.mv[1][0][1];
2821
2822                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2823                         for(; qpi<4; qpi++){
2824                             int dquant= dquant_tab[qpi];
2825                             qp= last_qp + dquant;
2826                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2827                                 continue;
2828                             backup_s.dquant= dquant;
2829                             if(s->mb_intra && s->dc_val[0]){
2830                                 for(i=0; i<6; i++){
2831                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2832                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2833                                 }
2834                             }
2835
2836                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2837                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2838                             if(best_s.qscale != qp){
2839                                 if(s->mb_intra && s->dc_val[0]){
2840                                     for(i=0; i<6; i++){
2841                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2842                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2843                                     }
2844                                 }
2845                             }
2846                         }
2847                     }
2848                 }
2849                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2850                     int mx= s->b_direct_mv_table[xy][0];
2851                     int my= s->b_direct_mv_table[xy][1];
2852
2853                     backup_s.dquant = 0;
2854                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2855                     s->mb_intra= 0;
2856                     ff_mpeg4_set_direct_mv(s, mx, my);
2857                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2858                                  &dmin, &next_block, mx, my);
2859                 }
2860                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2861                     backup_s.dquant = 0;
2862                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2863                     s->mb_intra= 0;
2864                     ff_mpeg4_set_direct_mv(s, 0, 0);
2865                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2866                                  &dmin, &next_block, 0, 0);
2867                 }
2868                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2869                     int coded=0;
2870                     for(i=0; i<6; i++)
2871                         coded |= s->block_last_index[i];
2872                     if(coded){
2873                         int mx,my;
2874                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2875                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2876                             mx=my=0; //FIXME find the one we actually used
2877                             ff_mpeg4_set_direct_mv(s, mx, my);
2878                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2879                             mx= s->mv[1][0][0];
2880                             my= s->mv[1][0][1];
2881                         }else{
2882                             mx= s->mv[0][0][0];
2883                             my= s->mv[0][0][1];
2884                         }
2885
2886                         s->mv_dir= best_s.mv_dir;
2887                         s->mv_type = best_s.mv_type;
2888                         s->mb_intra= 0;
2889 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2890                         s->mv[0][0][1] = best_s.mv[0][0][1];
2891                         s->mv[1][0][0] = best_s.mv[1][0][0];
2892                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2893                         backup_s.dquant= 0;
2894                         s->skipdct=1;
2895                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2896                                         &dmin, &next_block, mx, my);
2897                         s->skipdct=0;
2898                     }
2899                 }
2900
2901                 s->current_picture.qscale_table[xy] = best_s.qscale;
2902
2903                 copy_context_after_encode(s, &best_s, -1);
2904
2905                 pb_bits_count= put_bits_count(&s->pb);
2906                 flush_put_bits(&s->pb);
2907                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2908                 s->pb= backup_s.pb;
2909
2910                 if(s->data_partitioning){
2911                     pb2_bits_count= put_bits_count(&s->pb2);
2912                     flush_put_bits(&s->pb2);
2913                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2914                     s->pb2= backup_s.pb2;
2915
2916                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2917                     flush_put_bits(&s->tex_pb);
2918                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2919                     s->tex_pb= backup_s.tex_pb;
2920                 }
2921                 s->last_bits= put_bits_count(&s->pb);
2922
2923                 if (CONFIG_H263_ENCODER &&
2924                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2925                     ff_h263_update_motion_val(s);
2926
2927                 if(next_block==0){ //FIXME 16 vs linesize16
2928                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2929                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2930                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2931                 }
2932
2933                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2934                     ff_MPV_decode_mb(s, s->block);
2935             } else {
2936                 int motion_x = 0, motion_y = 0;
2937                 s->mv_type=MV_TYPE_16X16;
2938                 // only one MB-Type possible
2939
2940                 switch(mb_type){
2941                 case CANDIDATE_MB_TYPE_INTRA:
2942                     s->mv_dir = 0;
2943                     s->mb_intra= 1;
2944                     motion_x= s->mv[0][0][0] = 0;
2945                     motion_y= s->mv[0][0][1] = 0;
2946                     break;
2947                 case CANDIDATE_MB_TYPE_INTER:
2948                     s->mv_dir = MV_DIR_FORWARD;
2949                     s->mb_intra= 0;
2950                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2951                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2952                     break;
2953                 case CANDIDATE_MB_TYPE_INTER_I:
2954                     s->mv_dir = MV_DIR_FORWARD;
2955                     s->mv_type = MV_TYPE_FIELD;
2956                     s->mb_intra= 0;
2957                     for(i=0; i<2; i++){
2958                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2959                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2960                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2961                     }
2962                     break;
2963                 case CANDIDATE_MB_TYPE_INTER4V:
2964                     s->mv_dir = MV_DIR_FORWARD;
2965                     s->mv_type = MV_TYPE_8X8;
2966                     s->mb_intra= 0;
2967                     for(i=0; i<4; i++){
2968                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2969                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2970                     }
2971                     break;
2972                 case CANDIDATE_MB_TYPE_DIRECT:
2973                     if (CONFIG_MPEG4_ENCODER) {
2974                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2975                         s->mb_intra= 0;
2976                         motion_x=s->b_direct_mv_table[xy][0];
2977                         motion_y=s->b_direct_mv_table[xy][1];
2978                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2979                     }
2980                     break;
2981                 case CANDIDATE_MB_TYPE_DIRECT0:
2982                     if (CONFIG_MPEG4_ENCODER) {
2983                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2984                         s->mb_intra= 0;
2985                         ff_mpeg4_set_direct_mv(s, 0, 0);
2986                     }
2987                     break;
2988                 case CANDIDATE_MB_TYPE_BIDIR:
2989                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2990                     s->mb_intra= 0;
2991                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2992                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2993                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2994                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2995                     break;
2996                 case CANDIDATE_MB_TYPE_BACKWARD:
2997                     s->mv_dir = MV_DIR_BACKWARD;
2998                     s->mb_intra= 0;
2999                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3000                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3001                     break;
3002                 case CANDIDATE_MB_TYPE_FORWARD:
3003                     s->mv_dir = MV_DIR_FORWARD;
3004                     s->mb_intra= 0;
3005                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3006                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3007                     break;
3008                 case CANDIDATE_MB_TYPE_FORWARD_I:
3009                     s->mv_dir = MV_DIR_FORWARD;
3010                     s->mv_type = MV_TYPE_FIELD;
3011                     s->mb_intra= 0;
3012                     for(i=0; i<2; i++){
3013                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3014                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3015                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3016                     }
3017                     break;
3018                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3019                     s->mv_dir = MV_DIR_BACKWARD;
3020                     s->mv_type = MV_TYPE_FIELD;
3021                     s->mb_intra= 0;
3022                     for(i=0; i<2; i++){
3023                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3024                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3025                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3026                     }
3027                     break;
3028                 case CANDIDATE_MB_TYPE_BIDIR_I:
3029                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3030                     s->mv_type = MV_TYPE_FIELD;
3031                     s->mb_intra= 0;
3032                     for(dir=0; dir<2; dir++){
3033                         for(i=0; i<2; i++){
3034                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3035                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3036                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3037                         }
3038                     }
3039                     break;
3040                 default:
3041                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3042                 }
3043
3044                 encode_mb(s, motion_x, motion_y);
3045
3046                 // RAL: Update last macroblock type
3047                 s->last_mv_dir = s->mv_dir;
3048
3049                 if (CONFIG_H263_ENCODER &&
3050                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3051                     ff_h263_update_motion_val(s);
3052
3053                 ff_MPV_decode_mb(s, s->block);
3054             }
3055
3056             /* clean the MV table in IPS frames for direct mode in B frames */
3057             if(s->mb_intra /* && I,P,S_TYPE */){
3058                 s->p_mv_table[xy][0]=0;
3059                 s->p_mv_table[xy][1]=0;
3060             }
3061
3062             if(s->flags&CODEC_FLAG_PSNR){
3063                 int w= 16;
3064                 int h= 16;
3065
3066                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3067                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3068
3069                 s->current_picture.f->error[0] += sse(
3070                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3071                     s->dest[0], w, h, s->linesize);
3072                 s->current_picture.f->error[1] += sse(
3073                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3074                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3075                 s->current_picture.f->error[2] += sse(
3076                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3077                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3078             }
3079             if(s->loop_filter){
3080                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3081                     ff_h263_loop_filter(s);
3082             }
3083             av_dlog(s->avctx, "MB %d %d bits\n",
3084                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3085         }
3086     }
3087
3088     //not beautiful here but we must write it before flushing so it has to be here
3089     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3090         ff_msmpeg4_encode_ext_header(s);
3091
3092     write_slice_end(s);
3093
3094     /* Send the last GOB if RTP */
3095     if (s->avctx->rtp_callback) {
3096         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3097         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3098         /* Call the RTP callback to send the last GOB */
3099         emms_c();
3100         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3101     }
3102
3103     return 0;
3104 }
3105
3106 #define MERGE(field) dst->field += src->field; src->field=0
3107 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3108     MERGE(me.scene_change_score);
3109     MERGE(me.mc_mb_var_sum_temp);
3110     MERGE(me.mb_var_sum_temp);
3111 }
3112
3113 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3114     int i;
3115
3116     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3117     MERGE(dct_count[1]);
3118     MERGE(mv_bits);
3119     MERGE(i_tex_bits);
3120     MERGE(p_tex_bits);
3121     MERGE(i_count);
3122     MERGE(f_count);
3123     MERGE(b_count);
3124     MERGE(skip_count);
3125     MERGE(misc_bits);
3126     MERGE(er.error_count);
3127     MERGE(padding_bug_score);
3128     MERGE(current_picture.f->error[0]);
3129     MERGE(current_picture.f->error[1]);
3130     MERGE(current_picture.f->error[2]);
3131
3132     if(dst->avctx->noise_reduction){
3133         for(i=0; i<64; i++){
3134             MERGE(dct_error_sum[0][i]);
3135             MERGE(dct_error_sum[1][i]);
3136         }
3137     }
3138
3139     assert(put_bits_count(&src->pb) % 8 ==0);
3140     assert(put_bits_count(&dst->pb) % 8 ==0);
3141     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3142     flush_put_bits(&dst->pb);
3143 }
3144
3145 static int estimate_qp(MpegEncContext *s, int dry_run){
3146     if (s->next_lambda){
3147         s->current_picture_ptr->f->quality =
3148         s->current_picture.f->quality = s->next_lambda;
3149         if(!dry_run) s->next_lambda= 0;
3150     } else if (!s->fixed_qscale) {
3151         s->current_picture_ptr->f->quality =
3152         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3153         if (s->current_picture.f->quality < 0)
3154             return -1;
3155     }
3156
3157     if(s->adaptive_quant){
3158         switch(s->codec_id){
3159         case AV_CODEC_ID_MPEG4:
3160             if (CONFIG_MPEG4_ENCODER)
3161                 ff_clean_mpeg4_qscales(s);
3162             break;
3163         case AV_CODEC_ID_H263:
3164         case AV_CODEC_ID_H263P:
3165         case AV_CODEC_ID_FLV1:
3166             if (CONFIG_H263_ENCODER)
3167                 ff_clean_h263_qscales(s);
3168             break;
3169         default:
3170             ff_init_qscale_tab(s);
3171         }
3172
3173         s->lambda= s->lambda_table[0];
3174         //FIXME broken
3175     }else
3176         s->lambda = s->current_picture.f->quality;
3177     update_qscale(s);
3178     return 0;
3179 }
3180
3181 /* must be called before writing the header */
3182 static void set_frame_distances(MpegEncContext * s){
3183     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3184     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3185
3186     if(s->pict_type==AV_PICTURE_TYPE_B){
3187         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3188         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3189     }else{
3190         s->pp_time= s->time - s->last_non_b_time;
3191         s->last_non_b_time= s->time;
3192         assert(s->picture_number==0 || s->pp_time > 0);
3193     }
3194 }
3195
3196 static int encode_picture(MpegEncContext *s, int picture_number)
3197 {
3198     int i, ret;
3199     int bits;
3200     int context_count = s->slice_context_count;
3201
3202     s->picture_number = picture_number;
3203
3204     /* Reset the average MB variance */
3205     s->me.mb_var_sum_temp    =
3206     s->me.mc_mb_var_sum_temp = 0;
3207
3208     /* we need to initialize some time vars before we can encode b-frames */
3209     // RAL: Condition added for MPEG1VIDEO
3210     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3211         set_frame_distances(s);
3212     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3213         ff_set_mpeg4_time(s);
3214
3215     s->me.scene_change_score=0;
3216
3217 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3218
3219     if(s->pict_type==AV_PICTURE_TYPE_I){
3220         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3221         else                        s->no_rounding=0;
3222     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3223         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3224             s->no_rounding ^= 1;
3225     }
3226
3227     if(s->flags & CODEC_FLAG_PASS2){
3228         if (estimate_qp(s,1) < 0)
3229             return -1;
3230         ff_get_2pass_fcode(s);
3231     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3232         if(s->pict_type==AV_PICTURE_TYPE_B)
3233             s->lambda= s->last_lambda_for[s->pict_type];
3234         else
3235             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3236         update_qscale(s);
3237     }
3238
3239     s->mb_intra=0; //for the rate distortion & bit compare functions
3240     for(i=1; i<context_count; i++){
3241         ret = ff_update_duplicate_context(s->thread_context[i], s);
3242         if (ret < 0)
3243             return ret;
3244     }
3245
3246     if(ff_init_me(s)<0)
3247         return -1;
3248
3249     /* Estimate motion for every MB */
3250     if(s->pict_type != AV_PICTURE_TYPE_I){
3251         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3252         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3253         if (s->pict_type != AV_PICTURE_TYPE_B) {
3254             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3255                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3256             }
3257         }
3258
3259         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3260     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3261         /* I-Frame */
3262         for(i=0; i<s->mb_stride*s->mb_height; i++)
3263             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3264
3265         if(!s->fixed_qscale){
3266             /* finding spatial complexity for I-frame rate control */
3267             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3268         }
3269     }
3270     for(i=1; i<context_count; i++){
3271         merge_context_after_me(s, s->thread_context[i]);
3272     }
3273     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3274     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3275     emms_c();
3276
3277     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3278         s->pict_type= AV_PICTURE_TYPE_I;
3279         for(i=0; i<s->mb_stride*s->mb_height; i++)
3280             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3281         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3282                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3283     }
3284
3285     if(!s->umvplus){
3286         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3287             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3288
3289             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3290                 int a,b;
3291                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3292                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3293                 s->f_code= FFMAX3(s->f_code, a, b);
3294             }
3295
3296             ff_fix_long_p_mvs(s);
3297             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3298             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3299                 int j;
3300                 for(i=0; i<2; i++){
3301                     for(j=0; j<2; j++)
3302                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3303                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3304                 }
3305             }
3306         }
3307
3308         if(s->pict_type==AV_PICTURE_TYPE_B){
3309             int a, b;
3310
3311             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3312             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3313             s->f_code = FFMAX(a, b);
3314
3315             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3316             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3317             s->b_code = FFMAX(a, b);
3318
3319             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3320             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3321             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3322             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3323             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3324                 int dir, j;
3325                 for(dir=0; dir<2; dir++){
3326                     for(i=0; i<2; i++){
3327                         for(j=0; j<2; j++){
3328                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3329                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3330                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3331                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3332                         }
3333                     }
3334                 }
3335             }
3336         }
3337     }
3338
3339     if (estimate_qp(s, 0) < 0)
3340         return -1;
3341
3342     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3343         s->qscale= 3; //reduce clipping problems
3344
3345     if (s->out_format == FMT_MJPEG) {
3346         /* for mjpeg, we do include qscale in the matrix */
3347         for(i=1;i<64;i++){
3348             int j= s->dsp.idct_permutation[i];
3349
3350             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3351         }
3352         s->y_dc_scale_table=
3353         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3354         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3355         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3356                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3357         s->qscale= 8;
3358     }
3359
3360     //FIXME var duplication
3361     s->current_picture_ptr->f->key_frame =
3362     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3363     s->current_picture_ptr->f->pict_type =
3364     s->current_picture.f->pict_type = s->pict_type;
3365
3366     if (s->current_picture.f->key_frame)
3367         s->picture_in_gop_number=0;
3368
3369     s->last_bits= put_bits_count(&s->pb);
3370     switch(s->out_format) {
3371     case FMT_MJPEG:
3372         if (CONFIG_MJPEG_ENCODER)
3373             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3374                                            s->intra_matrix);
3375         break;
3376     case FMT_H261:
3377         if (CONFIG_H261_ENCODER)
3378             ff_h261_encode_picture_header(s, picture_number);
3379         break;
3380     case FMT_H263:
3381         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3382             ff_wmv2_encode_picture_header(s, picture_number);
3383         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3384             ff_msmpeg4_encode_picture_header(s, picture_number);
3385         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3386             ff_mpeg4_encode_picture_header(s, picture_number);
3387         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3388             ff_rv10_encode_picture_header(s, picture_number);
3389         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3390             ff_rv20_encode_picture_header(s, picture_number);
3391         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3392             ff_flv_encode_picture_header(s, picture_number);
3393         else if (CONFIG_H263_ENCODER)
3394             ff_h263_encode_picture_header(s, picture_number);
3395         break;
3396     case FMT_MPEG1:
3397         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3398             ff_mpeg1_encode_picture_header(s, picture_number);
3399         break;
3400     default:
3401         assert(0);
3402     }
3403     bits= put_bits_count(&s->pb);
3404     s->header_bits= bits - s->last_bits;
3405
3406     for(i=1; i<context_count; i++){
3407         update_duplicate_context_after_me(s->thread_context[i], s);
3408     }
3409     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3410     for(i=1; i<context_count; i++){
3411         merge_context_after_encode(s, s->thread_context[i]);
3412     }
3413     emms_c();
3414     return 0;
3415 }
3416
3417 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3418     const int intra= s->mb_intra;
3419     int i;
3420
3421     s->dct_count[intra]++;
3422
3423     for(i=0; i<64; i++){
3424         int level= block[i];
3425
3426         if(level){
3427             if(level>0){
3428                 s->dct_error_sum[intra][i] += level;
3429                 level -= s->dct_offset[intra][i];
3430                 if(level<0) level=0;
3431             }else{
3432                 s->dct_error_sum[intra][i] -= level;
3433                 level += s->dct_offset[intra][i];
3434                 if(level>0) level=0;
3435             }
3436             block[i]= level;
3437         }
3438     }
3439 }
3440
3441 static int dct_quantize_trellis_c(MpegEncContext *s,
3442                                   int16_t *block, int n,
3443                                   int qscale, int *overflow){
3444     const int *qmat;
3445     const uint8_t *scantable= s->intra_scantable.scantable;
3446     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3447     int max=0;
3448     unsigned int threshold1, threshold2;
3449     int bias=0;
3450     int run_tab[65];
3451     int level_tab[65];
3452     int score_tab[65];
3453     int survivor[65];
3454     int survivor_count;
3455     int last_run=0;
3456     int last_level=0;
3457     int last_score= 0;
3458     int last_i;
3459     int coeff[2][64];
3460     int coeff_count[64];
3461     int qmul, qadd, start_i, last_non_zero, i, dc;
3462     const int esc_length= s->ac_esc_length;
3463     uint8_t * length;
3464     uint8_t * last_length;
3465     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3466
3467     s->dsp.fdct (block);
3468
3469     if(s->dct_error_sum)
3470         s->denoise_dct(s, block);
3471     qmul= qscale*16;
3472     qadd= ((qscale-1)|1)*8;
3473
3474     if (s->mb_intra) {
3475         int q;
3476         if (!s->h263_aic) {
3477             if (n < 4)
3478                 q = s->y_dc_scale;
3479             else
3480                 q = s->c_dc_scale;
3481             q = q << 3;
3482         } else{
3483             /* For AIC we skip quant/dequant of INTRADC */
3484             q = 1 << 3;
3485             qadd=0;
3486         }
3487
3488         /* note: block[0] is assumed to be positive */
3489         block[0] = (block[0] + (q >> 1)) / q;
3490         start_i = 1;
3491         last_non_zero = 0;
3492         qmat = s->q_intra_matrix[qscale];
3493         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3494             bias= 1<<(QMAT_SHIFT-1);
3495         length     = s->intra_ac_vlc_length;
3496         last_length= s->intra_ac_vlc_last_length;
3497     } else {
3498         start_i = 0;
3499         last_non_zero = -1;
3500         qmat = s->q_inter_matrix[qscale];
3501         length     = s->inter_ac_vlc_length;
3502         last_length= s->inter_ac_vlc_last_length;
3503     }
3504     last_i= start_i;
3505
3506     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3507     threshold2= (threshold1<<1);
3508
3509     for(i=63; i>=start_i; i--) {
3510         const int j = scantable[i];
3511         int level = block[j] * qmat[j];
3512
3513         if(((unsigned)(level+threshold1))>threshold2){
3514             last_non_zero = i;
3515             break;
3516         }
3517     }
3518
3519     for(i=start_i; i<=last_non_zero; i++) {
3520         const int j = scantable[i];
3521         int level = block[j] * qmat[j];
3522
3523 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3524 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3525         if(((unsigned)(level+threshold1))>threshold2){
3526             if(level>0){
3527                 level= (bias + level)>>QMAT_SHIFT;
3528                 coeff[0][i]= level;
3529                 coeff[1][i]= level-1;
3530 //                coeff[2][k]= level-2;
3531             }else{
3532                 level= (bias - level)>>QMAT_SHIFT;
3533                 coeff[0][i]= -level;
3534                 coeff[1][i]= -level+1;
3535 //                coeff[2][k]= -level+2;
3536             }
3537             coeff_count[i]= FFMIN(level, 2);
3538             assert(coeff_count[i]);
3539             max |=level;
3540         }else{
3541             coeff[0][i]= (level>>31)|1;
3542             coeff_count[i]= 1;
3543         }
3544     }
3545
3546     *overflow= s->max_qcoeff < max; //overflow might have happened
3547
3548     if(last_non_zero < start_i){
3549         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3550         return last_non_zero;
3551     }
3552
3553     score_tab[start_i]= 0;
3554     survivor[0]= start_i;
3555     survivor_count= 1;
3556
3557     for(i=start_i; i<=last_non_zero; i++){
3558         int level_index, j, zero_distortion;
3559         int dct_coeff= FFABS(block[ scantable[i] ]);
3560         int best_score=256*256*256*120;
3561
3562         if (s->dsp.fdct == ff_fdct_ifast)
3563             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3564         zero_distortion= dct_coeff*dct_coeff;
3565
3566         for(level_index=0; level_index < coeff_count[i]; level_index++){
3567             int distortion;
3568             int level= coeff[level_index][i];
3569             const int alevel= FFABS(level);
3570             int unquant_coeff;
3571
3572             assert(level);
3573
3574             if(s->out_format == FMT_H263){
3575                 unquant_coeff= alevel*qmul + qadd;
3576             }else{ //MPEG1
3577                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3578                 if(s->mb_intra){
3579                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3580                         unquant_coeff =   (unquant_coeff - 1) | 1;
3581                 }else{
3582                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3583                         unquant_coeff =   (unquant_coeff - 1) | 1;
3584                 }
3585                 unquant_coeff<<= 3;
3586             }
3587
3588             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3589             level+=64;
3590             if((level&(~127)) == 0){
3591                 for(j=survivor_count-1; j>=0; j--){
3592                     int run= i - survivor[j];
3593                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3594                     score += score_tab[i-run];
3595
3596                     if(score < best_score){
3597                         best_score= score;
3598                         run_tab[i+1]= run;
3599                         level_tab[i+1]= level-64;
3600                     }
3601                 }
3602
3603                 if(s->out_format == FMT_H263){
3604                     for(j=survivor_count-1; j>=0; j--){
3605                         int run= i - survivor[j];
3606                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3607                         score += score_tab[i-run];
3608                         if(score < last_score){
3609                             last_score= score;
3610                             last_run= run;
3611                             last_level= level-64;
3612                             last_i= i+1;
3613                         }
3614                     }
3615                 }
3616             }else{
3617                 distortion += esc_length*lambda;
3618                 for(j=survivor_count-1; j>=0; j--){
3619                     int run= i - survivor[j];
3620                     int score= distortion + score_tab[i-run];
3621
3622                     if(score < best_score){
3623                         best_score= score;
3624                         run_tab[i+1]= run;
3625                         level_tab[i+1]= level-64;
3626                     }
3627                 }
3628
3629                 if(s->out_format == FMT_H263){
3630                   for(j=survivor_count-1; j>=0; j--){
3631                         int run= i - survivor[j];
3632                         int score= distortion + score_tab[i-run];
3633                         if(score < last_score){
3634                             last_score= score;
3635                             last_run= run;
3636                             last_level= level-64;
3637                             last_i= i+1;
3638                         }
3639                     }
3640                 }
3641             }
3642         }
3643
3644         score_tab[i+1]= best_score;
3645
3646         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3647         if(last_non_zero <= 27){
3648             for(; survivor_count; survivor_count--){
3649                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3650                     break;
3651             }
3652         }else{
3653             for(; survivor_count; survivor_count--){
3654                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3655                     break;
3656             }
3657         }
3658
3659         survivor[ survivor_count++ ]= i+1;
3660     }
3661
3662     if(s->out_format != FMT_H263){
3663         last_score= 256*256*256*120;
3664         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3665             int score= score_tab[i];
3666             if(i) score += lambda*2; //FIXME exacter?
3667
3668             if(score < last_score){
3669                 last_score= score;
3670                 last_i= i;
3671                 last_level= level_tab[i];
3672                 last_run= run_tab[i];
3673             }
3674         }
3675     }
3676
3677     s->coded_score[n] = last_score;
3678
3679     dc= FFABS(block[0]);
3680     last_non_zero= last_i - 1;
3681     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3682
3683     if(last_non_zero < start_i)
3684         return last_non_zero;
3685
3686     if(last_non_zero == 0 && start_i == 0){
3687         int best_level= 0;
3688         int best_score= dc * dc;
3689
3690         for(i=0; i<coeff_count[0]; i++){
3691             int level= coeff[i][0];
3692             int alevel= FFABS(level);
3693             int unquant_coeff, score, distortion;
3694
3695             if(s->out_format == FMT_H263){
3696                     unquant_coeff= (alevel*qmul + qadd)>>3;
3697             }else{ //MPEG1
3698                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3699                     unquant_coeff =   (unquant_coeff - 1) | 1;
3700             }
3701             unquant_coeff = (unquant_coeff + 4) >> 3;
3702             unquant_coeff<<= 3 + 3;
3703
3704             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3705             level+=64;
3706             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3707             else                    score= distortion + esc_length*lambda;
3708
3709             if(score < best_score){
3710                 best_score= score;
3711                 best_level= level - 64;
3712             }
3713         }
3714         block[0]= best_level;
3715         s->coded_score[n] = best_score - dc*dc;
3716         if(best_level == 0) return -1;
3717         else                return last_non_zero;
3718     }
3719
3720     i= last_i;
3721     assert(last_level);
3722
3723     block[ perm_scantable[last_non_zero] ]= last_level;
3724     i -= last_run + 1;
3725
3726     for(; i>start_i; i -= run_tab[i] + 1){
3727         block[ perm_scantable[i-1] ]= level_tab[i];
3728     }
3729
3730     return last_non_zero;
3731 }
3732
3733 //#define REFINE_STATS 1
3734 static int16_t basis[64][64];
3735
3736 static void build_basis(uint8_t *perm){
3737     int i, j, x, y;
3738     emms_c();
3739     for(i=0; i<8; i++){
3740         for(j=0; j<8; j++){
3741             for(y=0; y<8; y++){
3742                 for(x=0; x<8; x++){
3743                     double s= 0.25*(1<<BASIS_SHIFT);
3744                     int index= 8*i + j;
3745                     int perm_index= perm[index];
3746                     if(i==0) s*= sqrt(0.5);
3747                     if(j==0) s*= sqrt(0.5);
3748                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3749                 }
3750             }
3751         }
3752     }
3753 }
3754
3755 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3756                         int16_t *block, int16_t *weight, int16_t *orig,
3757                         int n, int qscale){
3758     int16_t rem[64];
3759     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3760     const uint8_t *scantable= s->intra_scantable.scantable;
3761     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3762 //    unsigned int threshold1, threshold2;
3763 //    int bias=0;
3764     int run_tab[65];
3765     int prev_run=0;
3766     int prev_level=0;
3767     int qmul, qadd, start_i, last_non_zero, i, dc;
3768     uint8_t * length;
3769     uint8_t * last_length;
3770     int lambda;
3771     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3772 #ifdef REFINE_STATS
3773 static int count=0;
3774 static int after_last=0;
3775 static int to_zero=0;
3776 static int from_zero=0;
3777 static int raise=0;
3778 static int lower=0;
3779 static int messed_sign=0;
3780 #endif
3781
3782     if(basis[0][0] == 0)
3783         build_basis(s->dsp.idct_permutation);
3784
3785     qmul= qscale*2;
3786     qadd= (qscale-1)|1;
3787     if (s->mb_intra) {
3788         if (!s->h263_aic) {
3789             if (n < 4)
3790                 q = s->y_dc_scale;
3791             else
3792                 q = s->c_dc_scale;
3793         } else{
3794             /* For AIC we skip quant/dequant of INTRADC */
3795             q = 1;
3796             qadd=0;
3797         }
3798         q <<= RECON_SHIFT-3;
3799         /* note: block[0] is assumed to be positive */
3800         dc= block[0]*q;
3801 //        block[0] = (block[0] + (q >> 1)) / q;
3802         start_i = 1;
3803 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3804 //            bias= 1<<(QMAT_SHIFT-1);
3805         length     = s->intra_ac_vlc_length;
3806         last_length= s->intra_ac_vlc_last_length;
3807     } else {
3808         dc= 0;
3809         start_i = 0;
3810         length     = s->inter_ac_vlc_length;
3811         last_length= s->inter_ac_vlc_last_length;
3812     }
3813     last_non_zero = s->block_last_index[n];
3814
3815 #ifdef REFINE_STATS
3816 {START_TIMER
3817 #endif
3818     dc += (1<<(RECON_SHIFT-1));
3819     for(i=0; i<64; i++){
3820         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3821     }
3822 #ifdef REFINE_STATS
3823 STOP_TIMER("memset rem[]")}
3824 #endif
3825     sum=0;
3826     for(i=0; i<64; i++){
3827         int one= 36;
3828         int qns=4;
3829         int w;
3830
3831         w= FFABS(weight[i]) + qns*one;
3832         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3833
3834         weight[i] = w;
3835 //        w=weight[i] = (63*qns + (w/2)) / w;
3836
3837         assert(w>0);
3838         assert(w<(1<<6));
3839         sum += w*w;
3840     }
3841     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3842 #ifdef REFINE_STATS
3843 {START_TIMER
3844 #endif
3845     run=0;
3846     rle_index=0;
3847     for(i=start_i; i<=last_non_zero; i++){
3848         int j= perm_scantable[i];
3849         const int level= block[j];
3850         int coeff;
3851
3852         if(level){
3853             if(level<0) coeff= qmul*level - qadd;
3854             else        coeff= qmul*level + qadd;
3855             run_tab[rle_index++]=run;
3856             run=0;
3857
3858             s->dsp.add_8x8basis(rem, basis[j], coeff);
3859         }else{
3860             run++;
3861         }
3862     }
3863 #ifdef REFINE_STATS
3864 if(last_non_zero>0){
3865 STOP_TIMER("init rem[]")
3866 }
3867 }
3868
3869 {START_TIMER
3870 #endif
3871     for(;;){
3872         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3873         int best_coeff=0;
3874         int best_change=0;
3875         int run2, best_unquant_change=0, analyze_gradient;
3876 #ifdef REFINE_STATS
3877 {START_TIMER
3878 #endif
3879         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3880
3881         if(analyze_gradient){
3882 #ifdef REFINE_STATS
3883 {START_TIMER
3884 #endif
3885             for(i=0; i<64; i++){
3886                 int w= weight[i];
3887
3888                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3889             }
3890 #ifdef REFINE_STATS
3891 STOP_TIMER("rem*w*w")}
3892 {START_TIMER
3893 #endif
3894             s->dsp.fdct(d1);
3895 #ifdef REFINE_STATS
3896 STOP_TIMER("dct")}
3897 #endif
3898         }
3899
3900         if(start_i){
3901             const int level= block[0];
3902             int change, old_coeff;
3903
3904             assert(s->mb_intra);
3905
3906             old_coeff= q*level;
3907
3908             for(change=-1; change<=1; change+=2){
3909                 int new_level= level + change;
3910                 int score, new_coeff;
3911
3912                 new_coeff= q*new_level;
3913                 if(new_coeff >= 2048 || new_coeff < 0)
3914                     continue;
3915
3916                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3917                 if(score<best_score){
3918                     best_score= score;
3919                     best_coeff= 0;
3920                     best_change= change;
3921                     best_unquant_change= new_coeff - old_coeff;
3922                 }
3923             }
3924         }
3925
3926         run=0;
3927         rle_index=0;
3928         run2= run_tab[rle_index++];
3929         prev_level=0;
3930         prev_run=0;
3931
3932         for(i=start_i; i<64; i++){
3933             int j= perm_scantable[i];
3934             const int level= block[j];
3935             int change, old_coeff;
3936
3937             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3938                 break;
3939
3940             if(level){
3941                 if(level<0) old_coeff= qmul*level - qadd;
3942                 else        old_coeff= qmul*level + qadd;
3943                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3944             }else{
3945                 old_coeff=0;
3946                 run2--;
3947                 assert(run2>=0 || i >= last_non_zero );
3948             }
3949
3950             for(change=-1; change<=1; change+=2){
3951                 int new_level= level + change;
3952                 int score, new_coeff, unquant_change;
3953
3954                 score=0;
3955                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3956                    continue;
3957
3958                 if(new_level){
3959                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3960                     else            new_coeff= qmul*new_level + qadd;
3961                     if(new_coeff >= 2048 || new_coeff <= -2048)
3962                         continue;
3963                     //FIXME check for overflow
3964
3965                     if(level){
3966                         if(level < 63 && level > -63){
3967                             if(i < last_non_zero)
3968                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3969                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3970                             else
3971                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3972                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3973                         }
3974                     }else{
3975                         assert(FFABS(new_level)==1);
3976
3977                         if(analyze_gradient){
3978                             int g= d1[ scantable[i] ];
3979                             if(g && (g^new_level) >= 0)
3980                                 continue;
3981                         }
3982
3983                         if(i < last_non_zero){
3984                             int next_i= i + run2 + 1;
3985                             int next_level= block[ perm_scantable[next_i] ] + 64;
3986
3987                             if(next_level&(~127))
3988                                 next_level= 0;
3989
3990                             if(next_i < last_non_zero)
3991                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3992                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3993                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3994                             else
3995                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3996                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3997                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3998                         }else{
3999                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4000                             if(prev_level){
4001                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4002                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4003                             }
4004                         }
4005                     }
4006                 }else{
4007                     new_coeff=0;
4008                     assert(FFABS(level)==1);
4009
4010                     if(i < last_non_zero){
4011                         int next_i= i + run2 + 1;
4012                         int next_level= block[ perm_scantable[next_i] ] + 64;
4013
4014                         if(next_level&(~127))
4015                             next_level= 0;
4016
4017                         if(next_i < last_non_zero)
4018                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4019                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4020                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4021                         else
4022                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4023                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4024                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4025                     }else{
4026                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4027                         if(prev_level){
4028                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4029                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4030                         }
4031                     }
4032                 }
4033
4034                 score *= lambda;
4035
4036                 unquant_change= new_coeff - old_coeff;
4037                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4038
4039                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4040                 if(score<best_score){
4041                     best_score= score;
4042                     best_coeff= i;
4043                     best_change= change;
4044                     best_unquant_change= unquant_change;
4045                 }
4046             }
4047             if(level){
4048                 prev_level= level + 64;
4049                 if(prev_level&(~127))
4050                     prev_level= 0;
4051                 prev_run= run;
4052                 run=0;
4053             }else{
4054                 run++;
4055             }
4056         }
4057 #ifdef REFINE_STATS
4058 STOP_TIMER("iterative step")}
4059 #endif
4060
4061         if(best_change){
4062             int j= perm_scantable[ best_coeff ];
4063
4064             block[j] += best_change;
4065
4066             if(best_coeff > last_non_zero){
4067                 last_non_zero= best_coeff;
4068                 assert(block[j]);
4069 #ifdef REFINE_STATS
4070 after_last++;
4071 #endif
4072             }else{
4073 #ifdef REFINE_STATS
4074 if(block[j]){
4075     if(block[j] - best_change){
4076         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4077             raise++;
4078         }else{
4079             lower++;
4080         }
4081     }else{
4082         from_zero++;
4083     }
4084 }else{
4085     to_zero++;
4086 }
4087 #endif
4088                 for(; last_non_zero>=start_i; last_non_zero--){
4089                     if(block[perm_scantable[last_non_zero]])
4090                         break;
4091                 }
4092             }
4093 #ifdef REFINE_STATS
4094 count++;
4095 if(256*256*256*64 % count == 0){
4096     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4097 }
4098 #endif
4099             run=0;
4100             rle_index=0;
4101             for(i=start_i; i<=last_non_zero; i++){
4102                 int j= perm_scantable[i];
4103                 const int level= block[j];
4104
4105                  if(level){
4106                      run_tab[rle_index++]=run;
4107                      run=0;
4108                  }else{
4109                      run++;
4110                  }
4111             }
4112
4113             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4114         }else{
4115             break;
4116         }
4117     }
4118 #ifdef REFINE_STATS
4119 if(last_non_zero>0){
4120 STOP_TIMER("iterative search")
4121 }
4122 }
4123 #endif
4124
4125     return last_non_zero;
4126 }
4127
4128 int ff_dct_quantize_c(MpegEncContext *s,
4129                         int16_t *block, int n,
4130                         int qscale, int *overflow)
4131 {
4132     int i, j, level, last_non_zero, q, start_i;
4133     const int *qmat;
4134     const uint8_t *scantable= s->intra_scantable.scantable;
4135     int bias;
4136     int max=0;
4137     unsigned int threshold1, threshold2;
4138
4139     s->dsp.fdct (block);
4140
4141     if(s->dct_error_sum)
4142         s->denoise_dct(s, block);
4143
4144     if (s->mb_intra) {
4145         if (!s->h263_aic) {
4146             if (n < 4)
4147                 q = s->y_dc_scale;
4148             else
4149                 q = s->c_dc_scale;
4150             q = q << 3;
4151         } else
4152             /* For AIC we skip quant/dequant of INTRADC */
4153             q = 1 << 3;
4154
4155         /* note: block[0] is assumed to be positive */
4156         block[0] = (block[0] + (q >> 1)) / q;
4157         start_i = 1;
4158         last_non_zero = 0;
4159         qmat = s->q_intra_matrix[qscale];
4160         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4161     } else {
4162         start_i = 0;
4163         last_non_zero = -1;
4164         qmat = s->q_inter_matrix[qscale];
4165         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4166     }
4167     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4168     threshold2= (threshold1<<1);
4169     for(i=63;i>=start_i;i--) {
4170         j = scantable[i];
4171         level = block[j] * qmat[j];
4172
4173         if(((unsigned)(level+threshold1))>threshold2){
4174             last_non_zero = i;
4175             break;
4176         }else{
4177             block[j]=0;
4178         }
4179     }
4180     for(i=start_i; i<=last_non_zero; i++) {
4181         j = scantable[i];
4182         level = block[j] * qmat[j];
4183
4184 //        if(   bias+level >= (1<<QMAT_SHIFT)
4185 //           || bias-level >= (1<<QMAT_SHIFT)){
4186         if(((unsigned)(level+threshold1))>threshold2){
4187             if(level>0){
4188                 level= (bias + level)>>QMAT_SHIFT;
4189                 block[j]= level;
4190             }else{
4191                 level= (bias - level)>>QMAT_SHIFT;
4192                 block[j]= -level;
4193             }
4194             max |=level;
4195         }else{
4196             block[j]=0;
4197         }
4198     }
4199     *overflow= s->max_qcoeff < max; //overflow might have happened
4200
4201     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4202     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4203         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4204
4205     return last_non_zero;
4206 }
4207
4208 #define OFFSET(x) offsetof(MpegEncContext, x)
4209 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4210 static const AVOption h263_options[] = {
4211     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4212     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4213     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4214     FF_MPV_COMMON_OPTS
4215     { NULL },
4216 };
4217
4218 static const AVClass h263_class = {
4219     .class_name = "H.263 encoder",
4220     .item_name  = av_default_item_name,
4221     .option     = h263_options,
4222     .version    = LIBAVUTIL_VERSION_INT,
4223 };
4224
4225 AVCodec ff_h263_encoder = {
4226     .name           = "h263",
4227     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4228     .type           = AVMEDIA_TYPE_VIDEO,
4229     .id             = AV_CODEC_ID_H263,
4230     .priv_data_size = sizeof(MpegEncContext),
4231     .init           = ff_MPV_encode_init,
4232     .encode2        = ff_MPV_encode_picture,
4233     .close          = ff_MPV_encode_end,
4234     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4235     .priv_class     = &h263_class,
4236 };
4237
4238 static const AVOption h263p_options[] = {
4239     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4240     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4241     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4242     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4243     FF_MPV_COMMON_OPTS
4244     { NULL },
4245 };
4246 static const AVClass h263p_class = {
4247     .class_name = "H.263p encoder",
4248     .item_name  = av_default_item_name,
4249     .option     = h263p_options,
4250     .version    = LIBAVUTIL_VERSION_INT,
4251 };
4252
4253 AVCodec ff_h263p_encoder = {
4254     .name           = "h263p",
4255     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4256     .type           = AVMEDIA_TYPE_VIDEO,
4257     .id             = AV_CODEC_ID_H263P,
4258     .priv_data_size = sizeof(MpegEncContext),
4259     .init           = ff_MPV_encode_init,
4260     .encode2        = ff_MPV_encode_picture,
4261     .close          = ff_MPV_encode_end,
4262     .capabilities   = CODEC_CAP_SLICE_THREADS,
4263     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4264     .priv_class     = &h263p_class,
4265 };
4266
4267 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4268
4269 AVCodec ff_msmpeg4v2_encoder = {
4270     .name           = "msmpeg4v2",
4271     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4272     .type           = AVMEDIA_TYPE_VIDEO,
4273     .id             = AV_CODEC_ID_MSMPEG4V2,
4274     .priv_data_size = sizeof(MpegEncContext),
4275     .init           = ff_MPV_encode_init,
4276     .encode2        = ff_MPV_encode_picture,
4277     .close          = ff_MPV_encode_end,
4278     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4279     .priv_class     = &msmpeg4v2_class,
4280 };
4281
4282 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4283
4284 AVCodec ff_msmpeg4v3_encoder = {
4285     .name           = "msmpeg4",
4286     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4287     .type           = AVMEDIA_TYPE_VIDEO,
4288     .id             = AV_CODEC_ID_MSMPEG4V3,
4289     .priv_data_size = sizeof(MpegEncContext),
4290     .init           = ff_MPV_encode_init,
4291     .encode2        = ff_MPV_encode_picture,
4292     .close          = ff_MPV_encode_end,
4293     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4294     .priv_class     = &msmpeg4v3_class,
4295 };
4296
4297 FF_MPV_GENERIC_CLASS(wmv1)
4298
4299 AVCodec ff_wmv1_encoder = {
4300     .name           = "wmv1",
4301     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4302     .type           = AVMEDIA_TYPE_VIDEO,
4303     .id             = AV_CODEC_ID_WMV1,
4304     .priv_data_size = sizeof(MpegEncContext),
4305     .init           = ff_MPV_encode_init,
4306     .encode2        = ff_MPV_encode_picture,
4307     .close          = ff_MPV_encode_end,
4308     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4309     .priv_class     = &wmv1_class,
4310 };