git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include "libavutil/intmath.h"
  31 #include "libavutil/mathematics.h"
  32 #include "libavutil/opt.h"
  33 #include "avcodec.h"
  34 #include "dsputil.h"
  35 #include "mpegvideo.h"
  36 #include "h263.h"
  37 #include "mjpegenc.h"
  38 #include "msmpeg4.h"
  39 #include "faandct.h"
  40 #include "thread.h"
  41 #include "aandcttab.h"
  42 #include "flv.h"
  43 #include "mpeg4video.h"
  44 #include "internal.h"
  45 #include "bytestream.h"
  46 #include <limits.h>
  47
  48 //#undef NDEBUG
  49 //#include <assert.h>
  50
  51 static int encode_picture(MpegEncContext *s, int picture_number);
  52 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  53 static int sse_mb(MpegEncContext *s);
  54 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  55 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  56
  57 /* enable all paranoid tests for rounding, overflows, etc... */
  58 //#define PARANOID
  59
  60 //#define DEBUG
  61
  62 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  63 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  64
  65 const AVOption ff_mpv_generic_options[] = {
  66     FF_MPV_COMMON_OPTS
  67     { NULL },
  68 };
  69
  70 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
  71                        uint16_t (*qmat16)[2][64],
  72                        const uint16_t *quant_matrix,
  73                        int bias, int qmin, int qmax, int intra)
  74 {
  75     int qscale;
  76     int shift = 0;
  77
  78     for (qscale = qmin; qscale <= qmax; qscale++) {
  79         int i;
  80         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
  81             dsp->fdct == ff_jpeg_fdct_islow_10 ||
  82             dsp->fdct == ff_faandct) {
  83             for (i = 0; i < 64; i++) {
  84                 const int j = dsp->idct_permutation[i];
  85                 /* 16 <= qscale * quant_matrix[i] <= 7905
  86                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  87                  *             19952 <=              x  <= 249205026
  88                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
  89                  *           3444240 >= (1 << 36) / (x) >= 275 */
  90
  91                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
  92                                         (qscale * quant_matrix[j]));
  93             }
  94         } else if (dsp->fdct == ff_fdct_ifast) {
  95             for (i = 0; i < 64; i++) {
  96                 const int j = dsp->idct_permutation[i];
  97                 /* 16 <= qscale * quant_matrix[i] <= 7905
  98                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  99                  *             19952 <=              x  <= 249205026
 100                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 101                  *           3444240 >= (1 << 36) / (x) >= 275 */
 102
 103                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 104                                         (ff_aanscales[i] * qscale *
 105                                          quant_matrix[j]));
 106             }
 107         } else {
 108             for (i = 0; i < 64; i++) {
 109                 const int j = dsp->idct_permutation[i];
 110                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 111                  * Assume x = qscale * quant_matrix[i]
 112                  * So             16 <=              x  <= 7905
 113                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 114                  * so          32768 >= (1 << 19) / (x) >= 67 */
 115                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 116                                         (qscale * quant_matrix[j]));
 117                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 118                 //                    (qscale * quant_matrix[i]);
 119                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 120                                        (qscale * quant_matrix[j]);
 121
 122                 if (qmat16[qscale][0][i] == 0 ||
 123                     qmat16[qscale][0][i] == 128 * 256)
 124                     qmat16[qscale][0][i] = 128 * 256 - 1;
 125                 qmat16[qscale][1][i] =
 126                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 127                                 qmat16[qscale][0][i]);
 128             }
 129         }
 130
 131         for (i = intra; i < 64; i++) {
 132             int64_t max = 8191;
 133             if (dsp->fdct == ff_fdct_ifast) {
 134                 max = (8191LL * ff_aanscales[i]) >> 14;
 135             }
 136             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 137                 shift++;
 138             }
 139         }
 140     }
 141     if (shift) {
 142         av_log(NULL, AV_LOG_INFO,
 143                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 144                QMAT_SHIFT - shift);
 145     }
 146 }
 147
 148 static inline void update_qscale(MpegEncContext *s)
 149 {
 150     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 151                 (FF_LAMBDA_SHIFT + 7);
 152     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 153
 154     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 155                  FF_LAMBDA_SHIFT;
 156 }
 157
 158 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 159 {
 160     int i;
 161
 162     if (matrix) {
 163         put_bits(pb, 1, 1);
 164         for (i = 0; i < 64; i++) {
 165             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 166         }
 167     } else
 168         put_bits(pb, 1, 0);
 169 }
 170
 171 /**
 172  * init s->current_picture.qscale_table from s->lambda_table
 173  */
 174 void ff_init_qscale_tab(MpegEncContext *s)
 175 {
 176     int8_t * const qscale_table = s->current_picture.f.qscale_table;
 177     int i;
 178
 179     for (i = 0; i < s->mb_num; i++) {
 180         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 181         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 182         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 183                                                   s->avctx->qmax);
 184     }
 185 }
 186
 187 static void copy_picture_attributes(MpegEncContext *s,
 188                                     AVFrame *dst,
 189                                     AVFrame *src)
 190 {
 191     int i;
 192
 193     dst->pict_type              = src->pict_type;
 194     dst->quality                = src->quality;
 195     dst->coded_picture_number   = src->coded_picture_number;
 196     dst->display_picture_number = src->display_picture_number;
 197     //dst->reference              = src->reference;
 198     dst->pts                    = src->pts;
 199     dst->interlaced_frame       = src->interlaced_frame;
 200     dst->top_field_first        = src->top_field_first;
 201
 202     if (s->avctx->me_threshold) {
 203         if (!src->motion_val[0])
 204             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
 205         if (!src->mb_type)
 206             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
 207         if (!src->ref_index[0])
 208             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
 209         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
 210             av_log(s->avctx, AV_LOG_ERROR,
 211                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
 212                    src->motion_subsample_log2, dst->motion_subsample_log2);
 213
 214         memcpy(dst->mb_type, src->mb_type,
 215                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
 216
 217         for (i = 0; i < 2; i++) {
 218             int stride = ((16 * s->mb_width ) >>
 219                           src->motion_subsample_log2) + 1;
 220             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
 221
 222             if (src->motion_val[i] &&
 223                 src->motion_val[i] != dst->motion_val[i]) {
 224                 memcpy(dst->motion_val[i], src->motion_val[i],
 225                        2 * stride * height * sizeof(int16_t));
 226             }
 227             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
 228                 memcpy(dst->ref_index[i], src->ref_index[i],
 229                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
 230             }
 231         }
 232     }
 233 }
 234
 235 static void update_duplicate_context_after_me(MpegEncContext *dst,
 236                                               MpegEncContext *src)
 237 {
 238 #define COPY(a) dst->a= src->a
 239     COPY(pict_type);
 240     COPY(current_picture);
 241     COPY(f_code);
 242     COPY(b_code);
 243     COPY(qscale);
 244     COPY(lambda);
 245     COPY(lambda2);
 246     COPY(picture_in_gop_number);
 247     COPY(gop_picture_number);
 248     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 249     COPY(progressive_frame);    // FIXME don't set in encode_header
 250     COPY(partitioned_frame);    // FIXME don't set in encode_header
 251 #undef COPY
 252 }
 253
 254 /**
 255  * Set the given MpegEncContext to defaults for encoding.
 256  * the changed fields will not depend upon the prior state of the MpegEncContext.
 257  */
 258 static void MPV_encode_defaults(MpegEncContext *s)
 259 {
 260     int i;
 261     ff_MPV_common_defaults(s);
 262
 263     for (i = -16; i < 16; i++) {
 264         default_fcode_tab[i + MAX_MV] = 1;
 265     }
 266     s->me.mv_penalty = default_mv_penalty;
 267     s->fcode_tab     = default_fcode_tab;
 268 }
 269
 270 /* init video encoder */
 271 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
 272 {
 273     MpegEncContext *s = avctx->priv_data;
 274     int i;
 275     int chroma_h_shift, chroma_v_shift;
 276
 277     MPV_encode_defaults(s);
 278
 279     switch (avctx->codec_id) {
 280     case AV_CODEC_ID_MPEG2VIDEO:
 281         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
 282             avctx->pix_fmt != PIX_FMT_YUV422P) {
 283             av_log(avctx, AV_LOG_ERROR,
 284                    "only YUV420 and YUV422 are supported\n");
 285             return -1;
 286         }
 287         break;
 288     case AV_CODEC_ID_LJPEG:
 289         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
 290             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
 291             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
 292             avctx->pix_fmt != PIX_FMT_BGRA     &&
 293             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
 294               avctx->pix_fmt != PIX_FMT_YUV422P &&
 295               avctx->pix_fmt != PIX_FMT_YUV444P) ||
 296              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
 297             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
 298             return -1;
 299         }
 300         break;
 301     case AV_CODEC_ID_MJPEG:
 302         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
 303             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
 304             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
 305               avctx->pix_fmt != PIX_FMT_YUV422P) ||
 306              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
 307             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 308             return -1;
 309         }
 310         break;
 311     default:
 312         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
 313             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 314             return -1;
 315         }
 316     }
 317
 318     switch (avctx->pix_fmt) {
 319     case PIX_FMT_YUVJ422P:
 320     case PIX_FMT_YUV422P:
 321         s->chroma_format = CHROMA_422;
 322         break;
 323     case PIX_FMT_YUVJ420P:
 324     case PIX_FMT_YUV420P:
 325     default:
 326         s->chroma_format = CHROMA_420;
 327         break;
 328     }
 329
 330     s->bit_rate = avctx->bit_rate;
 331     s->width    = avctx->width;
 332     s->height   = avctx->height;
 333     if (avctx->gop_size > 600 &&
 334         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 335         av_log(avctx, AV_LOG_ERROR,
 336                "Warning keyframe interval too large! reducing it ...\n");
 337         avctx->gop_size = 600;
 338     }
 339     s->gop_size     = avctx->gop_size;
 340     s->avctx        = avctx;
 341     s->flags        = avctx->flags;
 342     s->flags2       = avctx->flags2;
 343     s->max_b_frames = avctx->max_b_frames;
 344     s->codec_id     = avctx->codec->id;
 345 #if FF_API_MPV_GLOBAL_OPTS
 346     if (avctx->luma_elim_threshold)
 347         s->luma_elim_threshold   = avctx->luma_elim_threshold;
 348     if (avctx->chroma_elim_threshold)
 349         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
 350 #endif
 351     s->strict_std_compliance = avctx->strict_std_compliance;
 352     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 353     s->mpeg_quant         = avctx->mpeg_quant;
 354     s->rtp_mode           = !!avctx->rtp_payload_size;
 355     s->intra_dc_precision = avctx->intra_dc_precision;
 356     s->user_specified_pts = AV_NOPTS_VALUE;
 357
 358     if (s->gop_size <= 1) {
 359         s->intra_only = 1;
 360         s->gop_size   = 12;
 361     } else {
 362         s->intra_only = 0;
 363     }
 364
 365     s->me_method = avctx->me_method;
 366
 367     /* Fixed QSCALE */
 368     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 369
 370 #if FF_API_MPV_GLOBAL_OPTS
 371     if (s->flags & CODEC_FLAG_QP_RD)
 372         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
 373 #endif
 374
 375     s->adaptive_quant = (s->avctx->lumi_masking ||
 376                          s->avctx->dark_masking ||
 377                          s->avctx->temporal_cplx_masking ||
 378                          s->avctx->spatial_cplx_masking  ||
 379                          s->avctx->p_masking      ||
 380                          s->avctx->border_masking ||
 381                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 382                         !s->fixed_qscale;
 383
 384     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 385
 386     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 387         av_log(avctx, AV_LOG_ERROR,
 388                "a vbv buffer size is needed, "
 389                "for encoding with a maximum bitrate\n");
 390         return -1;
 391     }
 392
 393     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 394         av_log(avctx, AV_LOG_INFO,
 395                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 396     }
 397
 398     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 399         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 400         return -1;
 401     }
 402
 403     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 404         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 405         return -1;
 406     }
 407
 408     if (avctx->rc_max_rate &&
 409         avctx->rc_max_rate == avctx->bit_rate &&
 410         avctx->rc_max_rate != avctx->rc_min_rate) {
 411         av_log(avctx, AV_LOG_INFO,
 412                "impossible bitrate constraints, this will fail\n");
 413     }
 414
 415     if (avctx->rc_buffer_size &&
 416         avctx->bit_rate * (int64_t)avctx->time_base.num >
 417             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 418         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 419         return -1;
 420     }
 421
 422     if (!s->fixed_qscale &&
 423         avctx->bit_rate * av_q2d(avctx->time_base) >
 424             avctx->bit_rate_tolerance) {
 425         av_log(avctx, AV_LOG_ERROR,
 426                "bitrate tolerance too small for bitrate\n");
 427         return -1;
 428     }
 429
 430     if (s->avctx->rc_max_rate &&
 431         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 432         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 433          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 434         90000LL * (avctx->rc_buffer_size - 1) >
 435             s->avctx->rc_max_rate * 0xFFFFLL) {
 436         av_log(avctx, AV_LOG_INFO,
 437                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 438                "specified vbv buffer is too large for the given bitrate!\n");
 439     }
 440
 441     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 442         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 443         s->codec_id != AV_CODEC_ID_FLV1) {
 444         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 445         return -1;
 446     }
 447
 448     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 449         av_log(avctx, AV_LOG_ERROR,
 450                "OBMC is only supported with simple mb decision\n");
 451         return -1;
 452     }
 453
 454     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 455         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 456         return -1;
 457     }
 458
 459     if (s->max_b_frames                    &&
 460         s->codec_id != AV_CODEC_ID_MPEG4      &&
 461         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 462         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 463         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 464         return -1;
 465     }
 466
 467     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 468          s->codec_id == AV_CODEC_ID_H263  ||
 469          s->codec_id == AV_CODEC_ID_H263P) &&
 470         (avctx->sample_aspect_ratio.num > 255 ||
 471          avctx->sample_aspect_ratio.den > 255)) {
 472         av_log(avctx, AV_LOG_ERROR,
 473                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
 474                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 475         return -1;
 476     }
 477
 478     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 479         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 480         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 481         return -1;
 482     }
 483
 484     // FIXME mpeg2 uses that too
 485     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
 486         av_log(avctx, AV_LOG_ERROR,
 487                "mpeg2 style quantization not supported by codec\n");
 488         return -1;
 489     }
 490
 491 #if FF_API_MPV_GLOBAL_OPTS
 492     if (s->flags & CODEC_FLAG_CBP_RD)
 493         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
 494 #endif
 495
 496     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 497         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 498         return -1;
 499     }
 500
 501     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 502         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 503         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 504         return -1;
 505     }
 506
 507     if (s->avctx->scenechange_threshold < 1000000000 &&
 508         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 509         av_log(avctx, AV_LOG_ERROR,
 510                "closed gop with scene change detection are not supported yet, "
 511                "set threshold to 1000000000\n");
 512         return -1;
 513     }
 514
 515     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 516         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 517             av_log(avctx, AV_LOG_ERROR,
 518                   "low delay forcing is only available for mpeg2\n");
 519             return -1;
 520         }
 521         if (s->max_b_frames != 0) {
 522             av_log(avctx, AV_LOG_ERROR,
 523                    "b frames cannot be used with low delay\n");
 524             return -1;
 525         }
 526     }
 527
 528     if (s->q_scale_type == 1) {
 529         if (avctx->qmax > 12) {
 530             av_log(avctx, AV_LOG_ERROR,
 531                    "non linear quant only supports qmax <= 12 currently\n");
 532             return -1;
 533         }
 534     }
 535
 536     if (s->avctx->thread_count > 1         &&
 537         s->codec_id != AV_CODEC_ID_MPEG4      &&
 538         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 539         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 540         (s->codec_id != AV_CODEC_ID_H263P)) {
 541         av_log(avctx, AV_LOG_ERROR,
 542                "multi threaded encoding not supported by codec\n");
 543         return -1;
 544     }
 545
 546     if (s->avctx->thread_count < 1) {
 547         av_log(avctx, AV_LOG_ERROR,
 548                "automatic thread number detection not supported by codec,"
 549                "patch welcome\n");
 550         return -1;
 551     }
 552
 553     if (s->avctx->thread_count > 1)
 554         s->rtp_mode = 1;
 555
 556     if (!avctx->time_base.den || !avctx->time_base.num) {
 557         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 558         return -1;
 559     }
 560
 561     i = (INT_MAX / 2 + 128) >> 8;
 562     if (avctx->me_threshold >= i) {
 563         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
 564                i - 1);
 565         return -1;
 566     }
 567     if (avctx->mb_threshold >= i) {
 568         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 569                i - 1);
 570         return -1;
 571     }
 572
 573     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 574         av_log(avctx, AV_LOG_INFO,
 575                "notice: b_frame_strategy only affects the first pass\n");
 576         avctx->b_frame_strategy = 0;
 577     }
 578
 579     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 580     if (i > 1) {
 581         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 582         avctx->time_base.den /= i;
 583         avctx->time_base.num /= i;
 584         //return -1;
 585     }
 586
 587     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 588         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
 589         // (a + x * 3 / 8) / x
 590         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 591         s->inter_quant_bias = 0;
 592     } else {
 593         s->intra_quant_bias = 0;
 594         // (a - x / 4) / x
 595         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 596     }
 597
 598     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 599         s->intra_quant_bias = avctx->intra_quant_bias;
 600     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 601         s->inter_quant_bias = avctx->inter_quant_bias;
 602
 603     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
 604                                   &chroma_v_shift);
 605
 606     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 607         s->avctx->time_base.den > (1 << 16) - 1) {
 608         av_log(avctx, AV_LOG_ERROR,
 609                "timebase %d/%d not supported by MPEG 4 standard, "
 610                "the maximum admitted value for the timebase denominator "
 611                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 612                (1 << 16) - 1);
 613         return -1;
 614     }
 615     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 616
 617 #if FF_API_MPV_GLOBAL_OPTS
 618     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
 619         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
 620     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
 621         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
 622     if (avctx->quantizer_noise_shaping)
 623         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
 624 #endif
 625
 626     switch (avctx->codec->id) {
 627     case AV_CODEC_ID_MPEG1VIDEO:
 628         s->out_format = FMT_MPEG1;
 629         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 630         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 631         break;
 632     case AV_CODEC_ID_MPEG2VIDEO:
 633         s->out_format = FMT_MPEG1;
 634         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 635         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 636         s->rtp_mode   = 1;
 637         break;
 638     case AV_CODEC_ID_LJPEG:
 639     case AV_CODEC_ID_MJPEG:
 640         s->out_format = FMT_MJPEG;
 641         s->intra_only = 1; /* force intra only for jpeg */
 642         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
 643             avctx->pix_fmt   == PIX_FMT_BGRA) {
 644             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
 645             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
 646             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
 647         } else {
 648             s->mjpeg_vsample[0] = 2;
 649             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
 650             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
 651             s->mjpeg_hsample[0] = 2;
 652             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
 653             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
 654         }
 655         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
 656             ff_mjpeg_encode_init(s) < 0)
 657             return -1;
 658         avctx->delay = 0;
 659         s->low_delay = 1;
 660         break;
 661     case AV_CODEC_ID_H261:
 662         if (!CONFIG_H261_ENCODER)
 663             return -1;
 664         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 665             av_log(avctx, AV_LOG_ERROR,
 666                    "The specified picture size of %dx%d is not valid for the "
 667                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 668                     s->width, s->height);
 669             return -1;
 670         }
 671         s->out_format = FMT_H261;
 672         avctx->delay  = 0;
 673         s->low_delay  = 1;
 674         break;
 675     case AV_CODEC_ID_H263:
 676         if (!CONFIG_H263_ENCODER)
 677         return -1;
 678         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 679                              s->width, s->height) == 8) {
 680             av_log(avctx, AV_LOG_INFO,
 681                    "The specified picture size of %dx%d is not valid for "
 682                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 683                    "352x288, 704x576, and 1408x1152."
 684                    "Try H.263+.\n", s->width, s->height);
 685             return -1;
 686         }
 687         s->out_format = FMT_H263;
 688         avctx->delay  = 0;
 689         s->low_delay  = 1;
 690         break;
 691     case AV_CODEC_ID_H263P:
 692         s->out_format = FMT_H263;
 693         s->h263_plus  = 1;
 694         /* Fx */
 695         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 696         s->modified_quant  = s->h263_aic;
 697         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 698         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 699
 700         /* /Fx */
 701         /* These are just to be sure */
 702         avctx->delay = 0;
 703         s->low_delay = 1;
 704         break;
 705     case AV_CODEC_ID_FLV1:
 706         s->out_format      = FMT_H263;
 707         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 708         s->unrestricted_mv = 1;
 709         s->rtp_mode  = 0; /* don't allow GOB */
 710         avctx->delay = 0;
 711         s->low_delay = 1;
 712         break;
 713     case AV_CODEC_ID_RV10:
 714         s->out_format = FMT_H263;
 715         avctx->delay  = 0;
 716         s->low_delay  = 1;
 717         break;
 718     case AV_CODEC_ID_RV20:
 719         s->out_format      = FMT_H263;
 720         avctx->delay       = 0;
 721         s->low_delay       = 1;
 722         s->modified_quant  = 1;
 723         s->h263_aic        = 1;
 724         s->h263_plus       = 1;
 725         s->loop_filter     = 1;
 726         s->unrestricted_mv = 0;
 727         break;
 728     case AV_CODEC_ID_MPEG4:
 729         s->out_format      = FMT_H263;
 730         s->h263_pred       = 1;
 731         s->unrestricted_mv = 1;
 732         s->low_delay       = s->max_b_frames ? 0 : 1;
 733         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 734         break;
 735     case AV_CODEC_ID_MSMPEG4V2:
 736         s->out_format      = FMT_H263;
 737         s->h263_pred       = 1;
 738         s->unrestricted_mv = 1;
 739         s->msmpeg4_version = 2;
 740         avctx->delay       = 0;
 741         s->low_delay       = 1;
 742         break;
 743     case AV_CODEC_ID_MSMPEG4V3:
 744         s->out_format        = FMT_H263;
 745         s->h263_pred         = 1;
 746         s->unrestricted_mv   = 1;
 747         s->msmpeg4_version   = 3;
 748         s->flipflop_rounding = 1;
 749         avctx->delay         = 0;
 750         s->low_delay         = 1;
 751         break;
 752     case AV_CODEC_ID_WMV1:
 753         s->out_format        = FMT_H263;
 754         s->h263_pred         = 1;
 755         s->unrestricted_mv   = 1;
 756         s->msmpeg4_version   = 4;
 757         s->flipflop_rounding = 1;
 758         avctx->delay         = 0;
 759         s->low_delay         = 1;
 760         break;
 761     case AV_CODEC_ID_WMV2:
 762         s->out_format        = FMT_H263;
 763         s->h263_pred         = 1;
 764         s->unrestricted_mv   = 1;
 765         s->msmpeg4_version   = 5;
 766         s->flipflop_rounding = 1;
 767         avctx->delay         = 0;
 768         s->low_delay         = 1;
 769         break;
 770     default:
 771         return -1;
 772     }
 773
 774     avctx->has_b_frames = !s->low_delay;
 775
 776     s->encoding = 1;
 777
 778     s->progressive_frame    =
 779     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 780                                                 CODEC_FLAG_INTERLACED_ME) ||
 781                                 s->alternate_scan);
 782
 783     /* init */
 784     if (ff_MPV_common_init(s) < 0)
 785         return -1;
 786
 787     if (ARCH_X86)
 788         ff_MPV_encode_init_x86(s);
 789
 790     if (!s->dct_quantize)
 791         s->dct_quantize = ff_dct_quantize_c;
 792     if (!s->denoise_dct)
 793         s->denoise_dct  = denoise_dct_c;
 794     s->fast_dct_quantize = s->dct_quantize;
 795     if (avctx->trellis)
 796         s->dct_quantize  = dct_quantize_trellis_c;
 797
 798     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 799         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 800
 801     s->quant_precision = 5;
 802
 803     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
 804     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
 805
 806     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 807         ff_h261_encode_init(s);
 808     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 809         ff_h263_encode_init(s);
 810     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 811         ff_msmpeg4_encode_init(s);
 812     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 813         && s->out_format == FMT_MPEG1)
 814         ff_mpeg1_encode_init(s);
 815
 816     /* init q matrix */
 817     for (i = 0; i < 64; i++) {
 818         int j = s->dsp.idct_permutation[i];
 819         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 820             s->mpeg_quant) {
 821             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 822             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 823         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 824             s->intra_matrix[j] =
 825             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 826         } else {
 827             /* mpeg1/2 */
 828             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 829             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 830         }
 831         if (s->avctx->intra_matrix)
 832             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 833         if (s->avctx->inter_matrix)
 834             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 835     }
 836
 837     /* precompute matrix */
 838     /* for mjpeg, we do include qscale in the matrix */
 839     if (s->out_format != FMT_MJPEG) {
 840         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
 841                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 842                           31, 1);
 843         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
 844                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 845                           31, 0);
 846     }
 847
 848     if (ff_rate_control_init(s) < 0)
 849         return -1;
 850
 851     return 0;
 852 }
 853
 854 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
 855 {
 856     MpegEncContext *s = avctx->priv_data;
 857
 858     ff_rate_control_uninit(s);
 859
 860     ff_MPV_common_end(s);
 861     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
 862         s->out_format == FMT_MJPEG)
 863         ff_mjpeg_encode_close(s);
 864
 865     av_freep(&avctx->extradata);
 866
 867     return 0;
 868 }
 869
 870 static int get_sae(uint8_t *src, int ref, int stride)
 871 {
 872     int x,y;
 873     int acc = 0;
 874
 875     for (y = 0; y < 16; y++) {
 876         for (x = 0; x < 16; x++) {
 877             acc += FFABS(src[x + y * stride] - ref);
 878         }
 879     }
 880
 881     return acc;
 882 }
 883
 884 static int get_intra_count(MpegEncContext *s, uint8_t *src,
 885                            uint8_t *ref, int stride)
 886 {
 887     int x, y, w, h;
 888     int acc = 0;
 889
 890     w = s->width  & ~15;
 891     h = s->height & ~15;
 892
 893     for (y = 0; y < h; y += 16) {
 894         for (x = 0; x < w; x += 16) {
 895             int offset = x + y * stride;
 896             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
 897                                      16);
 898             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
 899             int sae  = get_sae(src + offset, mean, stride);
 900
 901             acc += sae + 500 < sad;
 902         }
 903     }
 904     return acc;
 905 }
 906
 907
 908 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
 909 {
 910     AVFrame *pic = NULL;
 911     int64_t pts;
 912     int i;
 913     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
 914                                                  (s->low_delay ? 0 : 1);
 915     int direct = 1;
 916
 917     if (pic_arg) {
 918         pts = pic_arg->pts;
 919         pic_arg->display_picture_number = s->input_picture_number++;
 920
 921         if (pts != AV_NOPTS_VALUE) {
 922             if (s->user_specified_pts != AV_NOPTS_VALUE) {
 923                 int64_t time = pts;
 924                 int64_t last = s->user_specified_pts;
 925
 926                 if (time <= last) {
 927                     av_log(s->avctx, AV_LOG_ERROR,
 928                            "Error, Invalid timestamp=%"PRId64", "
 929                            "last=%"PRId64"\n", pts, s->user_specified_pts);
 930                     return -1;
 931                 }
 932
 933                 if (!s->low_delay && pic_arg->display_picture_number == 1)
 934                     s->dts_delta = time - last;
 935             }
 936             s->user_specified_pts = pts;
 937         } else {
 938             if (s->user_specified_pts != AV_NOPTS_VALUE) {
 939                 s->user_specified_pts =
 940                 pts = s->user_specified_pts + 1;
 941                 av_log(s->avctx, AV_LOG_INFO,
 942                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
 943                        pts);
 944             } else {
 945                 pts = pic_arg->display_picture_number;
 946             }
 947         }
 948     }
 949
 950   if (pic_arg) {
 951     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
 952         direct = 0;
 953     if (pic_arg->linesize[0] != s->linesize)
 954         direct = 0;
 955     if (pic_arg->linesize[1] != s->uvlinesize)
 956         direct = 0;
 957     if (pic_arg->linesize[2] != s->uvlinesize)
 958         direct = 0;
 959
 960     av_dlog(s->avctx, "%d %d %d %d\n", pic_arg->linesize[0],
 961             pic_arg->linesize[1], s->linesize, s->uvlinesize);
 962
 963     if (direct) {
 964         i = ff_find_unused_picture(s, 1);
 965         if (i < 0)
 966             return i;
 967
 968         pic = &s->picture[i].f;
 969         pic->reference = 3;
 970
 971         for (i = 0; i < 4; i++) {
 972             pic->data[i]     = pic_arg->data[i];
 973             pic->linesize[i] = pic_arg->linesize[i];
 974         }
 975         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
 976             return -1;
 977         }
 978     } else {
 979         i = ff_find_unused_picture(s, 0);
 980         if (i < 0)
 981             return i;
 982
 983         pic = &s->picture[i].f;
 984         pic->reference = 3;
 985
 986         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
 987             return -1;
 988         }
 989
 990         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
 991             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
 992             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
 993             // empty
 994         } else {
 995             int h_chroma_shift, v_chroma_shift;
 996             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
 997                                           &v_chroma_shift);
 998
 999             for (i = 0; i < 3; i++) {
1000                 int src_stride = pic_arg->linesize[i];
1001                 int dst_stride = i ? s->uvlinesize : s->linesize;
1002                 int h_shift = i ? h_chroma_shift : 0;
1003                 int v_shift = i ? v_chroma_shift : 0;
1004                 int w = s->width  >> h_shift;
1005                 int h = s->height >> v_shift;
1006                 uint8_t *src = pic_arg->data[i];
1007                 uint8_t *dst = pic->data[i];
1008
1009                 if (!s->avctx->rc_buffer_size)
1010                     dst += INPLACE_OFFSET;
1011
1012                 if (src_stride == dst_stride)
1013                     memcpy(dst, src, src_stride * h);
1014                 else {
1015                     while (h--) {
1016                         memcpy(dst, src, w);
1017                         dst += dst_stride;
1018                         src += src_stride;
1019                     }
1020                 }
1021             }
1022         }
1023     }
1024     copy_picture_attributes(s, pic, pic_arg);
1025     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1026   }
1027
1028     /* shift buffer entries */
1029     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1030         s->input_picture[i - 1] = s->input_picture[i];
1031
1032     s->input_picture[encoding_delay] = (Picture*) pic;
1033
1034     return 0;
1035 }
1036
1037 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1038 {
1039     int x, y, plane;
1040     int score = 0;
1041     int64_t score64 = 0;
1042
1043     for (plane = 0; plane < 3; plane++) {
1044         const int stride = p->f.linesize[plane];
1045         const int bw = plane ? 1 : 2;
1046         for (y = 0; y < s->mb_height * bw; y++) {
1047             for (x = 0; x < s->mb_width * bw; x++) {
1048                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1049                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1050                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1051                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1052
1053                 switch (s->avctx->frame_skip_exp) {
1054                 case 0: score    =  FFMAX(score, v);          break;
1055                 case 1: score   += FFABS(v);                  break;
1056                 case 2: score   += v * v;                     break;
1057                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1058                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1059                 }
1060             }
1061         }
1062     }
1063
1064     if (score)
1065         score64 = score;
1066
1067     if (score64 < s->avctx->frame_skip_threshold)
1068         return 1;
1069     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1070         return 1;
1071     return 0;
1072 }
1073
1074 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1075 {
1076     AVPacket pkt = { 0 };
1077     int ret, got_output;
1078
1079     av_init_packet(&pkt);
1080     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1081     if (ret < 0)
1082         return ret;
1083
1084     ret = pkt.size;
1085     av_free_packet(&pkt);
1086     return ret;
1087 }
1088
1089 static int estimate_best_b_count(MpegEncContext *s)
1090 {
1091     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1092     AVCodecContext *c = avcodec_alloc_context3(NULL);
1093     AVFrame input[FF_MAX_B_FRAMES + 2];
1094     const int scale = s->avctx->brd_scale;
1095     int i, j, out_size, p_lambda, b_lambda, lambda2;
1096     int64_t best_rd  = INT64_MAX;
1097     int best_b_count = -1;
1098
1099     assert(scale >= 0 && scale <= 3);
1100
1101     //emms_c();
1102     //s->next_picture_ptr->quality;
1103     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1104     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1105     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1106     if (!b_lambda) // FIXME we should do this somewhere else
1107         b_lambda = p_lambda;
1108     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1109                FF_LAMBDA_SHIFT;
1110
1111     c->width        = s->width  >> scale;
1112     c->height       = s->height >> scale;
1113     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1114                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1115     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1116     c->mb_decision  = s->avctx->mb_decision;
1117     c->me_cmp       = s->avctx->me_cmp;
1118     c->mb_cmp       = s->avctx->mb_cmp;
1119     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1120     c->pix_fmt      = PIX_FMT_YUV420P;
1121     c->time_base    = s->avctx->time_base;
1122     c->max_b_frames = s->max_b_frames;
1123
1124     if (avcodec_open2(c, codec, NULL) < 0)
1125         return -1;
1126
1127     for (i = 0; i < s->max_b_frames + 2; i++) {
1128         int ysize = c->width * c->height;
1129         int csize = (c->width / 2) * (c->height / 2);
1130         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1131                                                 s->next_picture_ptr;
1132
1133         avcodec_get_frame_defaults(&input[i]);
1134         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1135         input[i].data[1]     = input[i].data[0] + ysize;
1136         input[i].data[2]     = input[i].data[1] + csize;
1137         input[i].linesize[0] = c->width;
1138         input[i].linesize[1] =
1139         input[i].linesize[2] = c->width / 2;
1140
1141         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1142             pre_input = *pre_input_ptr;
1143
1144             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1145                 pre_input.f.data[0] += INPLACE_OFFSET;
1146                 pre_input.f.data[1] += INPLACE_OFFSET;
1147                 pre_input.f.data[2] += INPLACE_OFFSET;
1148             }
1149
1150             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1151                                  pre_input.f.data[0], pre_input.f.linesize[0],
1152                                  c->width,      c->height);
1153             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1154                                  pre_input.f.data[1], pre_input.f.linesize[1],
1155                                  c->width >> 1, c->height >> 1);
1156             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1157                                  pre_input.f.data[2], pre_input.f.linesize[2],
1158                                  c->width >> 1, c->height >> 1);
1159         }
1160     }
1161
1162     for (j = 0; j < s->max_b_frames + 1; j++) {
1163         int64_t rd = 0;
1164
1165         if (!s->input_picture[j])
1166             break;
1167
1168         c->error[0] = c->error[1] = c->error[2] = 0;
1169
1170         input[0].pict_type = AV_PICTURE_TYPE_I;
1171         input[0].quality   = 1 * FF_QP2LAMBDA;
1172
1173         out_size = encode_frame(c, &input[0]);
1174
1175         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1176
1177         for (i = 0; i < s->max_b_frames + 1; i++) {
1178             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1179
1180             input[i + 1].pict_type = is_p ?
1181                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1182             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1183
1184             out_size = encode_frame(c, &input[i + 1]);
1185
1186             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1187         }
1188
1189         /* get the delayed frames */
1190         while (out_size) {
1191             out_size = encode_frame(c, NULL);
1192             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1193         }
1194
1195         rd += c->error[0] + c->error[1] + c->error[2];
1196
1197         if (rd < best_rd) {
1198             best_rd = rd;
1199             best_b_count = j;
1200         }
1201     }
1202
1203     avcodec_close(c);
1204     av_freep(&c);
1205
1206     for (i = 0; i < s->max_b_frames + 2; i++) {
1207         av_freep(&input[i].data[0]);
1208     }
1209
1210     return best_b_count;
1211 }
1212
1213 static int select_input_picture(MpegEncContext *s)
1214 {
1215     int i;
1216
1217     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1218         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1219     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1220
1221     /* set next picture type & ordering */
1222     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1223         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1224             s->next_picture_ptr == NULL || s->intra_only) {
1225             s->reordered_input_picture[0] = s->input_picture[0];
1226             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1227             s->reordered_input_picture[0]->f.coded_picture_number =
1228                 s->coded_picture_number++;
1229         } else {
1230             int b_frames;
1231
1232             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1233                 if (s->picture_in_gop_number < s->gop_size &&
1234                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1235                     // FIXME check that te gop check above is +-1 correct
1236                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1237                         for (i = 0; i < 4; i++)
1238                             s->input_picture[0]->f.data[i] = NULL;
1239                         s->input_picture[0]->f.type = 0;
1240                     } else {
1241                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1242                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1243
1244                         s->avctx->release_buffer(s->avctx,
1245                                                  &s->input_picture[0]->f);
1246                     }
1247
1248                     emms_c();
1249                     ff_vbv_update(s, 0);
1250
1251                     goto no_output_pic;
1252                 }
1253             }
1254
1255             if (s->flags & CODEC_FLAG_PASS2) {
1256                 for (i = 0; i < s->max_b_frames + 1; i++) {
1257                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1258
1259                     if (pict_num >= s->rc_context.num_entries)
1260                         break;
1261                     if (!s->input_picture[i]) {
1262                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1263                         break;
1264                     }
1265
1266                     s->input_picture[i]->f.pict_type =
1267                         s->rc_context.entry[pict_num].new_pict_type;
1268                 }
1269             }
1270
1271             if (s->avctx->b_frame_strategy == 0) {
1272                 b_frames = s->max_b_frames;
1273                 while (b_frames && !s->input_picture[b_frames])
1274                     b_frames--;
1275             } else if (s->avctx->b_frame_strategy == 1) {
1276                 for (i = 1; i < s->max_b_frames + 1; i++) {
1277                     if (s->input_picture[i] &&
1278                         s->input_picture[i]->b_frame_score == 0) {
1279                         s->input_picture[i]->b_frame_score =
1280                             get_intra_count(s,
1281                                             s->input_picture[i    ]->f.data[0],
1282                                             s->input_picture[i - 1]->f.data[0],
1283                                             s->linesize) + 1;
1284                     }
1285                 }
1286                 for (i = 0; i < s->max_b_frames + 1; i++) {
1287                     if (s->input_picture[i] == NULL ||
1288                         s->input_picture[i]->b_frame_score - 1 >
1289                             s->mb_num / s->avctx->b_sensitivity)
1290                         break;
1291                 }
1292
1293                 b_frames = FFMAX(0, i - 1);
1294
1295                 /* reset scores */
1296                 for (i = 0; i < b_frames + 1; i++) {
1297                     s->input_picture[i]->b_frame_score = 0;
1298                 }
1299             } else if (s->avctx->b_frame_strategy == 2) {
1300                 b_frames = estimate_best_b_count(s);
1301             } else {
1302                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1303                 b_frames = 0;
1304             }
1305
1306             emms_c();
1307
1308             for (i = b_frames - 1; i >= 0; i--) {
1309                 int type = s->input_picture[i]->f.pict_type;
1310                 if (type && type != AV_PICTURE_TYPE_B)
1311                     b_frames = i;
1312             }
1313             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1314                 b_frames == s->max_b_frames) {
1315                 av_log(s->avctx, AV_LOG_ERROR,
1316                        "warning, too many b frames in a row\n");
1317             }
1318
1319             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1320                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1321                     s->gop_size > s->picture_in_gop_number) {
1322                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1323                 } else {
1324                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1325                         b_frames = 0;
1326                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1327                 }
1328             }
1329
1330             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1331                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1332                 b_frames--;
1333
1334             s->reordered_input_picture[0] = s->input_picture[b_frames];
1335             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1336                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1337             s->reordered_input_picture[0]->f.coded_picture_number =
1338                 s->coded_picture_number++;
1339             for (i = 0; i < b_frames; i++) {
1340                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1341                 s->reordered_input_picture[i + 1]->f.pict_type =
1342                     AV_PICTURE_TYPE_B;
1343                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1344                     s->coded_picture_number++;
1345             }
1346         }
1347     }
1348 no_output_pic:
1349     if (s->reordered_input_picture[0]) {
1350         s->reordered_input_picture[0]->f.reference =
1351            s->reordered_input_picture[0]->f.pict_type !=
1352                AV_PICTURE_TYPE_B ? 3 : 0;
1353
1354         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1355
1356         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1357             s->avctx->rc_buffer_size) {
1358             // input is a shared pix, so we can't modifiy it -> alloc a new
1359             // one & ensure that the shared one is reuseable
1360
1361             Picture *pic;
1362             int i = ff_find_unused_picture(s, 0);
1363             if (i < 0)
1364                 return i;
1365             pic = &s->picture[i];
1366
1367             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1368             if (ff_alloc_picture(s, pic, 0) < 0) {
1369                 return -1;
1370             }
1371
1372             /* mark us unused / free shared pic */
1373             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1374                 s->avctx->release_buffer(s->avctx,
1375                                          &s->reordered_input_picture[0]->f);
1376             for (i = 0; i < 4; i++)
1377                 s->reordered_input_picture[0]->f.data[i] = NULL;
1378             s->reordered_input_picture[0]->f.type = 0;
1379
1380             copy_picture_attributes(s, &pic->f,
1381                                     &s->reordered_input_picture[0]->f);
1382
1383             s->current_picture_ptr = pic;
1384         } else {
1385             // input is not a shared pix -> reuse buffer for current_pix
1386
1387             assert(s->reordered_input_picture[0]->f.type ==
1388                        FF_BUFFER_TYPE_USER ||
1389                    s->reordered_input_picture[0]->f.type ==
1390                        FF_BUFFER_TYPE_INTERNAL);
1391
1392             s->current_picture_ptr = s->reordered_input_picture[0];
1393             for (i = 0; i < 4; i++) {
1394                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1395             }
1396         }
1397         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1398
1399         s->picture_number = s->new_picture.f.display_picture_number;
1400     } else {
1401         memset(&s->new_picture, 0, sizeof(Picture));
1402     }
1403     return 0;
1404 }
1405
1406 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1407                           const AVFrame *pic_arg, int *got_packet)
1408 {
1409     MpegEncContext *s = avctx->priv_data;
1410     int i, stuffing_count, ret;
1411     int context_count = s->slice_context_count;
1412
1413     s->picture_in_gop_number++;
1414
1415     if (load_input_picture(s, pic_arg) < 0)
1416         return -1;
1417
1418     if (select_input_picture(s) < 0) {
1419         return -1;
1420     }
1421
1422     /* output? */
1423     if (s->new_picture.f.data[0]) {
1424         if (!pkt->data &&
1425             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1426             return ret;
1427         if (s->mb_info) {
1428             s->mb_info_ptr = av_packet_new_side_data(pkt,
1429                                  AV_PKT_DATA_H263_MB_INFO,
1430                                  s->mb_width*s->mb_height*12);
1431             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1432         }
1433
1434         for (i = 0; i < context_count; i++) {
1435             int start_y = s->thread_context[i]->start_mb_y;
1436             int   end_y = s->thread_context[i]->  end_mb_y;
1437             int h       = s->mb_height;
1438             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1439             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1440
1441             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1442         }
1443
1444         s->pict_type = s->new_picture.f.pict_type;
1445         //emms_c();
1446         ff_MPV_frame_start(s, avctx);
1447 vbv_retry:
1448         if (encode_picture(s, s->picture_number) < 0)
1449             return -1;
1450
1451         avctx->header_bits = s->header_bits;
1452         avctx->mv_bits     = s->mv_bits;
1453         avctx->misc_bits   = s->misc_bits;
1454         avctx->i_tex_bits  = s->i_tex_bits;
1455         avctx->p_tex_bits  = s->p_tex_bits;
1456         avctx->i_count     = s->i_count;
1457         // FIXME f/b_count in avctx
1458         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1459         avctx->skip_count  = s->skip_count;
1460
1461         ff_MPV_frame_end(s);
1462
1463         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1464             ff_mjpeg_encode_picture_trailer(s);
1465
1466         if (avctx->rc_buffer_size) {
1467             RateControlContext *rcc = &s->rc_context;
1468             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1469
1470             if (put_bits_count(&s->pb) > max_size &&
1471                 s->lambda < s->avctx->lmax) {
1472                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1473                                        (s->qscale + 1) / s->qscale);
1474                 if (s->adaptive_quant) {
1475                     int i;
1476                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1477                         s->lambda_table[i] =
1478                             FFMAX(s->lambda_table[i] + 1,
1479                                   s->lambda_table[i] * (s->qscale + 1) /
1480                                   s->qscale);
1481                 }
1482                 s->mb_skipped = 0;        // done in MPV_frame_start()
1483                 // done in encode_picture() so we must undo it
1484                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1485                     if (s->flipflop_rounding          ||
1486                         s->codec_id == AV_CODEC_ID_H263P ||
1487                         s->codec_id == AV_CODEC_ID_MPEG4)
1488                         s->no_rounding ^= 1;
1489                 }
1490                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1491                     s->time_base       = s->last_time_base;
1492                     s->last_non_b_time = s->time - s->pp_time;
1493                 }
1494                 for (i = 0; i < context_count; i++) {
1495                     PutBitContext *pb = &s->thread_context[i]->pb;
1496                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1497                 }
1498                 goto vbv_retry;
1499             }
1500
1501             assert(s->avctx->rc_max_rate);
1502         }
1503
1504         if (s->flags & CODEC_FLAG_PASS1)
1505             ff_write_pass1_stats(s);
1506
1507         for (i = 0; i < 4; i++) {
1508             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1509             avctx->error[i] += s->current_picture_ptr->f.error[i];
1510         }
1511
1512         if (s->flags & CODEC_FLAG_PASS1)
1513             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1514                    avctx->i_tex_bits + avctx->p_tex_bits ==
1515                        put_bits_count(&s->pb));
1516         flush_put_bits(&s->pb);
1517         s->frame_bits  = put_bits_count(&s->pb);
1518
1519         stuffing_count = ff_vbv_update(s, s->frame_bits);
1520         if (stuffing_count) {
1521             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1522                     stuffing_count + 50) {
1523                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1524                 return -1;
1525             }
1526
1527             switch (s->codec_id) {
1528             case AV_CODEC_ID_MPEG1VIDEO:
1529             case AV_CODEC_ID_MPEG2VIDEO:
1530                 while (stuffing_count--) {
1531                     put_bits(&s->pb, 8, 0);
1532                 }
1533             break;
1534             case AV_CODEC_ID_MPEG4:
1535                 put_bits(&s->pb, 16, 0);
1536                 put_bits(&s->pb, 16, 0x1C3);
1537                 stuffing_count -= 4;
1538                 while (stuffing_count--) {
1539                     put_bits(&s->pb, 8, 0xFF);
1540                 }
1541             break;
1542             default:
1543                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1544             }
1545             flush_put_bits(&s->pb);
1546             s->frame_bits  = put_bits_count(&s->pb);
1547         }
1548
1549         /* update mpeg1/2 vbv_delay for CBR */
1550         if (s->avctx->rc_max_rate                          &&
1551             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1552             s->out_format == FMT_MPEG1                     &&
1553             90000LL * (avctx->rc_buffer_size - 1) <=
1554                 s->avctx->rc_max_rate * 0xFFFFLL) {
1555             int vbv_delay, min_delay;
1556             double inbits  = s->avctx->rc_max_rate *
1557                              av_q2d(s->avctx->time_base);
1558             int    minbits = s->frame_bits - 8 *
1559                              (s->vbv_delay_ptr - s->pb.buf - 1);
1560             double bits    = s->rc_context.buffer_index + minbits - inbits;
1561
1562             if (bits < 0)
1563                 av_log(s->avctx, AV_LOG_ERROR,
1564                        "Internal error, negative bits\n");
1565
1566             assert(s->repeat_first_field == 0);
1567
1568             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1569             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1570                         s->avctx->rc_max_rate;
1571
1572             vbv_delay = FFMAX(vbv_delay, min_delay);
1573
1574             assert(vbv_delay < 0xFFFF);
1575
1576             s->vbv_delay_ptr[0] &= 0xF8;
1577             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1578             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1579             s->vbv_delay_ptr[2] &= 0x07;
1580             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1581             avctx->vbv_delay     = vbv_delay * 300;
1582         }
1583         s->total_bits     += s->frame_bits;
1584         avctx->frame_bits  = s->frame_bits;
1585
1586         pkt->pts = s->current_picture.f.pts;
1587         if (!s->low_delay) {
1588             if (!s->current_picture.f.coded_picture_number)
1589                 pkt->dts = pkt->pts - s->dts_delta;
1590             else
1591                 pkt->dts = s->reordered_pts;
1592             s->reordered_pts = s->input_picture[0]->f.pts;
1593         } else
1594             pkt->dts = pkt->pts;
1595         if (s->current_picture.f.key_frame)
1596             pkt->flags |= AV_PKT_FLAG_KEY;
1597         if (s->mb_info)
1598             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1599     } else {
1600         s->frame_bits = 0;
1601     }
1602     assert((s->frame_bits & 7) == 0);
1603
1604     pkt->size = s->frame_bits / 8;
1605     *got_packet = !!pkt->size;
1606     return 0;
1607 }
1608
1609 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1610                                                 int n, int threshold)
1611 {
1612     static const char tab[64] = {
1613         3, 2, 2, 1, 1, 1, 1, 1,
1614         1, 1, 1, 1, 1, 1, 1, 1,
1615         1, 1, 1, 1, 1, 1, 1, 1,
1616         0, 0, 0, 0, 0, 0, 0, 0,
1617         0, 0, 0, 0, 0, 0, 0, 0,
1618         0, 0, 0, 0, 0, 0, 0, 0,
1619         0, 0, 0, 0, 0, 0, 0, 0,
1620         0, 0, 0, 0, 0, 0, 0, 0
1621     };
1622     int score = 0;
1623     int run = 0;
1624     int i;
1625     DCTELEM *block = s->block[n];
1626     const int last_index = s->block_last_index[n];
1627     int skip_dc;
1628
1629     if (threshold < 0) {
1630         skip_dc = 0;
1631         threshold = -threshold;
1632     } else
1633         skip_dc = 1;
1634
1635     /* Are all we could set to zero already zero? */
1636     if (last_index <= skip_dc - 1)
1637         return;
1638
1639     for (i = 0; i <= last_index; i++) {
1640         const int j = s->intra_scantable.permutated[i];
1641         const int level = FFABS(block[j]);
1642         if (level == 1) {
1643             if (skip_dc && i == 0)
1644                 continue;
1645             score += tab[run];
1646             run = 0;
1647         } else if (level > 1) {
1648             return;
1649         } else {
1650             run++;
1651         }
1652     }
1653     if (score >= threshold)
1654         return;
1655     for (i = skip_dc; i <= last_index; i++) {
1656         const int j = s->intra_scantable.permutated[i];
1657         block[j] = 0;
1658     }
1659     if (block[0])
1660         s->block_last_index[n] = 0;
1661     else
1662         s->block_last_index[n] = -1;
1663 }
1664
1665 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1666                                int last_index)
1667 {
1668     int i;
1669     const int maxlevel = s->max_qcoeff;
1670     const int minlevel = s->min_qcoeff;
1671     int overflow = 0;
1672
1673     if (s->mb_intra) {
1674         i = 1; // skip clipping of intra dc
1675     } else
1676         i = 0;
1677
1678     for (; i <= last_index; i++) {
1679         const int j = s->intra_scantable.permutated[i];
1680         int level = block[j];
1681
1682         if (level > maxlevel) {
1683             level = maxlevel;
1684             overflow++;
1685         } else if (level < minlevel) {
1686             level = minlevel;
1687             overflow++;
1688         }
1689
1690         block[j] = level;
1691     }
1692
1693     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1694         av_log(s->avctx, AV_LOG_INFO,
1695                "warning, clipping %d dct coefficients to %d..%d\n",
1696                overflow, minlevel, maxlevel);
1697 }
1698
1699 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1700 {
1701     int x, y;
1702     // FIXME optimize
1703     for (y = 0; y < 8; y++) {
1704         for (x = 0; x < 8; x++) {
1705             int x2, y2;
1706             int sum = 0;
1707             int sqr = 0;
1708             int count = 0;
1709
1710             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1711                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1712                     int v = ptr[x2 + y2 * stride];
1713                     sum += v;
1714                     sqr += v * v;
1715                     count++;
1716                 }
1717             }
1718             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1719         }
1720     }
1721 }
1722
1723 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1724                                                 int motion_x, int motion_y,
1725                                                 int mb_block_height,
1726                                                 int mb_block_count)
1727 {
1728     int16_t weight[8][64];
1729     DCTELEM orig[8][64];
1730     const int mb_x = s->mb_x;
1731     const int mb_y = s->mb_y;
1732     int i;
1733     int skip_dct[8];
1734     int dct_offset = s->linesize * 8; // default for progressive frames
1735     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1736     int wrap_y, wrap_c;
1737
1738     for (i = 0; i < mb_block_count; i++)
1739         skip_dct[i] = s->skipdct;
1740
1741     if (s->adaptive_quant) {
1742         const int last_qp = s->qscale;
1743         const int mb_xy = mb_x + mb_y * s->mb_stride;
1744
1745         s->lambda = s->lambda_table[mb_xy];
1746         update_qscale(s);
1747
1748         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1749             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1750             s->dquant = s->qscale - last_qp;
1751
1752             if (s->out_format == FMT_H263) {
1753                 s->dquant = av_clip(s->dquant, -2, 2);
1754
1755                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1756                     if (!s->mb_intra) {
1757                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1758                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1759                                 s->dquant = 0;
1760                         }
1761                         if (s->mv_type == MV_TYPE_8X8)
1762                             s->dquant = 0;
1763                     }
1764                 }
1765             }
1766         }
1767         ff_set_qscale(s, last_qp + s->dquant);
1768     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1769         ff_set_qscale(s, s->qscale + s->dquant);
1770
1771     wrap_y = s->linesize;
1772     wrap_c = s->uvlinesize;
1773     ptr_y  = s->new_picture.f.data[0] +
1774              (mb_y * 16 * wrap_y)              + mb_x * 16;
1775     ptr_cb = s->new_picture.f.data[1] +
1776              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1777     ptr_cr = s->new_picture.f.data[2] +
1778              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1779
1780     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1781         uint8_t *ebuf = s->edge_emu_buffer + 32;
1782         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1783                                 mb_y * 16, s->width, s->height);
1784         ptr_y = ebuf;
1785         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1786                                 mb_block_height, mb_x * 8, mb_y * 8,
1787                                 s->width >> 1, s->height >> 1);
1788         ptr_cb = ebuf + 18 * wrap_y;
1789         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1790                                 mb_block_height, mb_x * 8, mb_y * 8,
1791                                 s->width >> 1, s->height >> 1);
1792         ptr_cr = ebuf + 18 * wrap_y + 8;
1793     }
1794
1795     if (s->mb_intra) {
1796         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1797             int progressive_score, interlaced_score;
1798
1799             s->interlaced_dct = 0;
1800             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1801                                                     NULL, wrap_y, 8) +
1802                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1803                                                     NULL, wrap_y, 8) - 400;
1804
1805             if (progressive_score > 0) {
1806                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1807                                                        NULL, wrap_y * 2, 8) +
1808                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1809                                                        NULL, wrap_y * 2, 8);
1810                 if (progressive_score > interlaced_score) {
1811                     s->interlaced_dct = 1;
1812
1813                     dct_offset = wrap_y;
1814                     wrap_y <<= 1;
1815                     if (s->chroma_format == CHROMA_422)
1816                         wrap_c <<= 1;
1817                 }
1818             }
1819         }
1820
1821         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1822         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1823         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1824         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1825
1826         if (s->flags & CODEC_FLAG_GRAY) {
1827             skip_dct[4] = 1;
1828             skip_dct[5] = 1;
1829         } else {
1830             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1831             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1832             if (!s->chroma_y_shift) { /* 422 */
1833                 s->dsp.get_pixels(s->block[6],
1834                                   ptr_cb + (dct_offset >> 1), wrap_c);
1835                 s->dsp.get_pixels(s->block[7],
1836                                   ptr_cr + (dct_offset >> 1), wrap_c);
1837             }
1838         }
1839     } else {
1840         op_pixels_func (*op_pix)[4];
1841         qpel_mc_func (*op_qpix)[16];
1842         uint8_t *dest_y, *dest_cb, *dest_cr;
1843
1844         dest_y  = s->dest[0];
1845         dest_cb = s->dest[1];
1846         dest_cr = s->dest[2];
1847
1848         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1849             op_pix  = s->dsp.put_pixels_tab;
1850             op_qpix = s->dsp.put_qpel_pixels_tab;
1851         } else {
1852             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1853             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1854         }
1855
1856         if (s->mv_dir & MV_DIR_FORWARD) {
1857             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1858                           s->last_picture.f.data,
1859                           op_pix, op_qpix);
1860             op_pix  = s->dsp.avg_pixels_tab;
1861             op_qpix = s->dsp.avg_qpel_pixels_tab;
1862         }
1863         if (s->mv_dir & MV_DIR_BACKWARD) {
1864             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1865                           s->next_picture.f.data,
1866                           op_pix, op_qpix);
1867         }
1868
1869         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1870             int progressive_score, interlaced_score;
1871
1872             s->interlaced_dct = 0;
1873             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1874                                                     ptr_y,              wrap_y,
1875                                                     8) +
1876                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1877                                                     ptr_y + wrap_y * 8, wrap_y,
1878                                                     8) - 400;
1879
1880             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1881                 progressive_score -= 400;
1882
1883             if (progressive_score > 0) {
1884                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1885                                                        ptr_y,
1886                                                        wrap_y * 2, 8) +
1887                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1888                                                        ptr_y + wrap_y,
1889                                                        wrap_y * 2, 8);
1890
1891                 if (progressive_score > interlaced_score) {
1892                     s->interlaced_dct = 1;
1893
1894                     dct_offset = wrap_y;
1895                     wrap_y <<= 1;
1896                     if (s->chroma_format == CHROMA_422)
1897                         wrap_c <<= 1;
1898                 }
1899             }
1900         }
1901
1902         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1903         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1904         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1905                            dest_y + dct_offset, wrap_y);
1906         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1907                            dest_y + dct_offset + 8, wrap_y);
1908
1909         if (s->flags & CODEC_FLAG_GRAY) {
1910             skip_dct[4] = 1;
1911             skip_dct[5] = 1;
1912         } else {
1913             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1914             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1915             if (!s->chroma_y_shift) { /* 422 */
1916                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1917                                    dest_cb + (dct_offset >> 1), wrap_c);
1918                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1919                                    dest_cr + (dct_offset >> 1), wrap_c);
1920             }
1921         }
1922         /* pre quantization */
1923         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1924                 2 * s->qscale * s->qscale) {
1925             // FIXME optimize
1926             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1927                               wrap_y, 8) < 20 * s->qscale)
1928                 skip_dct[0] = 1;
1929             if (s->dsp.sad[1](NULL, ptr_y + 8,
1930                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1931                 skip_dct[1] = 1;
1932             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1933                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1934                 skip_dct[2] = 1;
1935             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1936                               dest_y + dct_offset + 8,
1937                               wrap_y, 8) < 20 * s->qscale)
1938                 skip_dct[3] = 1;
1939             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1940                               wrap_c, 8) < 20 * s->qscale)
1941                 skip_dct[4] = 1;
1942             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1943                               wrap_c, 8) < 20 * s->qscale)
1944                 skip_dct[5] = 1;
1945             if (!s->chroma_y_shift) { /* 422 */
1946                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1947                                   dest_cb + (dct_offset >> 1),
1948                                   wrap_c, 8) < 20 * s->qscale)
1949                     skip_dct[6] = 1;
1950                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1951                                   dest_cr + (dct_offset >> 1),
1952                                   wrap_c, 8) < 20 * s->qscale)
1953                     skip_dct[7] = 1;
1954             }
1955         }
1956     }
1957
1958     if (s->quantizer_noise_shaping) {
1959         if (!skip_dct[0])
1960             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1961         if (!skip_dct[1])
1962             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1963         if (!skip_dct[2])
1964             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1965         if (!skip_dct[3])
1966             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1967         if (!skip_dct[4])
1968             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1969         if (!skip_dct[5])
1970             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1971         if (!s->chroma_y_shift) { /* 422 */
1972             if (!skip_dct[6])
1973                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1974                                   wrap_c);
1975             if (!skip_dct[7])
1976                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1977                                   wrap_c);
1978         }
1979         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
1980     }
1981
1982     /* DCT & quantize */
1983     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1984     {
1985         for (i = 0; i < mb_block_count; i++) {
1986             if (!skip_dct[i]) {
1987                 int overflow;
1988                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1989                 // FIXME we could decide to change to quantizer instead of
1990                 // clipping
1991                 // JS: I don't think that would be a good idea it could lower
1992                 //     quality instead of improve it. Just INTRADC clipping
1993                 //     deserves changes in quantizer
1994                 if (overflow)
1995                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
1996             } else
1997                 s->block_last_index[i] = -1;
1998         }
1999         if (s->quantizer_noise_shaping) {
2000             for (i = 0; i < mb_block_count; i++) {
2001                 if (!skip_dct[i]) {
2002                     s->block_last_index[i] =
2003                         dct_quantize_refine(s, s->block[i], weight[i],
2004                                             orig[i], i, s->qscale);
2005                 }
2006             }
2007         }
2008
2009         if (s->luma_elim_threshold && !s->mb_intra)
2010             for (i = 0; i < 4; i++)
2011                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2012         if (s->chroma_elim_threshold && !s->mb_intra)
2013             for (i = 4; i < mb_block_count; i++)
2014                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2015
2016         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2017             for (i = 0; i < mb_block_count; i++) {
2018                 if (s->block_last_index[i] == -1)
2019                     s->coded_score[i] = INT_MAX / 256;
2020             }
2021         }
2022     }
2023
2024     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2025         s->block_last_index[4] =
2026         s->block_last_index[5] = 0;
2027         s->block[4][0] =
2028         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2029     }
2030
2031     // non c quantize code returns incorrect block_last_index FIXME
2032     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2033         for (i = 0; i < mb_block_count; i++) {
2034             int j;
2035             if (s->block_last_index[i] > 0) {
2036                 for (j = 63; j > 0; j--) {
2037                     if (s->block[i][s->intra_scantable.permutated[j]])
2038                         break;
2039                 }
2040                 s->block_last_index[i] = j;
2041             }
2042         }
2043     }
2044
2045     /* huffman encode */
2046     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2047     case AV_CODEC_ID_MPEG1VIDEO:
2048     case AV_CODEC_ID_MPEG2VIDEO:
2049         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2050             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2051         break;
2052     case AV_CODEC_ID_MPEG4:
2053         if (CONFIG_MPEG4_ENCODER)
2054             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2055         break;
2056     case AV_CODEC_ID_MSMPEG4V2:
2057     case AV_CODEC_ID_MSMPEG4V3:
2058     case AV_CODEC_ID_WMV1:
2059         if (CONFIG_MSMPEG4_ENCODER)
2060             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2061         break;
2062     case AV_CODEC_ID_WMV2:
2063         if (CONFIG_WMV2_ENCODER)
2064             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2065         break;
2066     case AV_CODEC_ID_H261:
2067         if (CONFIG_H261_ENCODER)
2068             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2069         break;
2070     case AV_CODEC_ID_H263:
2071     case AV_CODEC_ID_H263P:
2072     case AV_CODEC_ID_FLV1:
2073     case AV_CODEC_ID_RV10:
2074     case AV_CODEC_ID_RV20:
2075         if (CONFIG_H263_ENCODER)
2076             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2077         break;
2078     case AV_CODEC_ID_MJPEG:
2079         if (CONFIG_MJPEG_ENCODER)
2080             ff_mjpeg_encode_mb(s, s->block);
2081         break;
2082     default:
2083         assert(0);
2084     }
2085 }
2086
2087 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2088 {
2089     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2090     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2091 }
2092
2093 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2094     int i;
2095
2096     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2097
2098     /* mpeg1 */
2099     d->mb_skip_run= s->mb_skip_run;
2100     for(i=0; i<3; i++)
2101         d->last_dc[i] = s->last_dc[i];
2102
2103     /* statistics */
2104     d->mv_bits= s->mv_bits;
2105     d->i_tex_bits= s->i_tex_bits;
2106     d->p_tex_bits= s->p_tex_bits;
2107     d->i_count= s->i_count;
2108     d->f_count= s->f_count;
2109     d->b_count= s->b_count;
2110     d->skip_count= s->skip_count;
2111     d->misc_bits= s->misc_bits;
2112     d->last_bits= 0;
2113
2114     d->mb_skipped= 0;
2115     d->qscale= s->qscale;
2116     d->dquant= s->dquant;
2117
2118     d->esc3_level_length= s->esc3_level_length;
2119 }
2120
2121 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2122     int i;
2123
2124     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2125     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2126
2127     /* mpeg1 */
2128     d->mb_skip_run= s->mb_skip_run;
2129     for(i=0; i<3; i++)
2130         d->last_dc[i] = s->last_dc[i];
2131
2132     /* statistics */
2133     d->mv_bits= s->mv_bits;
2134     d->i_tex_bits= s->i_tex_bits;
2135     d->p_tex_bits= s->p_tex_bits;
2136     d->i_count= s->i_count;
2137     d->f_count= s->f_count;
2138     d->b_count= s->b_count;
2139     d->skip_count= s->skip_count;
2140     d->misc_bits= s->misc_bits;
2141
2142     d->mb_intra= s->mb_intra;
2143     d->mb_skipped= s->mb_skipped;
2144     d->mv_type= s->mv_type;
2145     d->mv_dir= s->mv_dir;
2146     d->pb= s->pb;
2147     if(s->data_partitioning){
2148         d->pb2= s->pb2;
2149         d->tex_pb= s->tex_pb;
2150     }
2151     d->block= s->block;
2152     for(i=0; i<8; i++)
2153         d->block_last_index[i]= s->block_last_index[i];
2154     d->interlaced_dct= s->interlaced_dct;
2155     d->qscale= s->qscale;
2156
2157     d->esc3_level_length= s->esc3_level_length;
2158 }
2159
2160 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2161                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2162                            int *dmin, int *next_block, int motion_x, int motion_y)
2163 {
2164     int score;
2165     uint8_t *dest_backup[3];
2166
2167     copy_context_before_encode(s, backup, type);
2168
2169     s->block= s->blocks[*next_block];
2170     s->pb= pb[*next_block];
2171     if(s->data_partitioning){
2172         s->pb2   = pb2   [*next_block];
2173         s->tex_pb= tex_pb[*next_block];
2174     }
2175
2176     if(*next_block){
2177         memcpy(dest_backup, s->dest, sizeof(s->dest));
2178         s->dest[0] = s->rd_scratchpad;
2179         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2180         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2181         assert(s->linesize >= 32); //FIXME
2182     }
2183
2184     encode_mb(s, motion_x, motion_y);
2185
2186     score= put_bits_count(&s->pb);
2187     if(s->data_partitioning){
2188         score+= put_bits_count(&s->pb2);
2189         score+= put_bits_count(&s->tex_pb);
2190     }
2191
2192     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2193         ff_MPV_decode_mb(s, s->block);
2194
2195         score *= s->lambda2;
2196         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2197     }
2198
2199     if(*next_block){
2200         memcpy(s->dest, dest_backup, sizeof(s->dest));
2201     }
2202
2203     if(score<*dmin){
2204         *dmin= score;
2205         *next_block^=1;
2206
2207         copy_context_after_encode(best, s, type);
2208     }
2209 }
2210
2211 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2212     uint32_t *sq = ff_squareTbl + 256;
2213     int acc=0;
2214     int x,y;
2215
2216     if(w==16 && h==16)
2217         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2218     else if(w==8 && h==8)
2219         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2220
2221     for(y=0; y<h; y++){
2222         for(x=0; x<w; x++){
2223             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2224         }
2225     }
2226
2227     assert(acc>=0);
2228
2229     return acc;
2230 }
2231
2232 static int sse_mb(MpegEncContext *s){
2233     int w= 16;
2234     int h= 16;
2235
2236     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2237     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2238
2239     if(w==16 && h==16)
2240       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2241         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2242                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2243                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2244       }else{
2245         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2246                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2247                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2248       }
2249     else
2250         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2251                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2252                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2253 }
2254
2255 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2256     MpegEncContext *s= *(void**)arg;
2257
2258
2259     s->me.pre_pass=1;
2260     s->me.dia_size= s->avctx->pre_dia_size;
2261     s->first_slice_line=1;
2262     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2263         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2264             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2265         }
2266         s->first_slice_line=0;
2267     }
2268
2269     s->me.pre_pass=0;
2270
2271     return 0;
2272 }
2273
2274 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2275     MpegEncContext *s= *(void**)arg;
2276
2277     ff_check_alignment();
2278
2279     s->me.dia_size= s->avctx->dia_size;
2280     s->first_slice_line=1;
2281     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2282         s->mb_x=0; //for block init below
2283         ff_init_block_index(s);
2284         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2285             s->block_index[0]+=2;
2286             s->block_index[1]+=2;
2287             s->block_index[2]+=2;
2288             s->block_index[3]+=2;
2289
2290             /* compute motion vector & mb_type and store in context */
2291             if(s->pict_type==AV_PICTURE_TYPE_B)
2292                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2293             else
2294                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2295         }
2296         s->first_slice_line=0;
2297     }
2298     return 0;
2299 }
2300
2301 static int mb_var_thread(AVCodecContext *c, void *arg){
2302     MpegEncContext *s= *(void**)arg;
2303     int mb_x, mb_y;
2304
2305     ff_check_alignment();
2306
2307     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2308         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2309             int xx = mb_x * 16;
2310             int yy = mb_y * 16;
2311             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2312             int varc;
2313             int sum = s->dsp.pix_sum(pix, s->linesize);
2314
2315             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2316
2317             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2318             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2319             s->me.mb_var_sum_temp    += varc;
2320         }
2321     }
2322     return 0;
2323 }
2324
2325 static void write_slice_end(MpegEncContext *s){
2326     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2327         if(s->partitioned_frame){
2328             ff_mpeg4_merge_partitions(s);
2329         }
2330
2331         ff_mpeg4_stuffing(&s->pb);
2332     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2333         ff_mjpeg_encode_stuffing(&s->pb);
2334     }
2335
2336     avpriv_align_put_bits(&s->pb);
2337     flush_put_bits(&s->pb);
2338
2339     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2340         s->misc_bits+= get_bits_diff(s);
2341 }
2342
2343 static void write_mb_info(MpegEncContext *s)
2344 {
2345     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2346     int offset = put_bits_count(&s->pb);
2347     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2348     int gobn = s->mb_y / s->gob_index;
2349     int pred_x, pred_y;
2350     if (CONFIG_H263_ENCODER)
2351         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2352     bytestream_put_le32(&ptr, offset);
2353     bytestream_put_byte(&ptr, s->qscale);
2354     bytestream_put_byte(&ptr, gobn);
2355     bytestream_put_le16(&ptr, mba);
2356     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2357     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2358     /* 4MV not implemented */
2359     bytestream_put_byte(&ptr, 0); /* hmv2 */
2360     bytestream_put_byte(&ptr, 0); /* vmv2 */
2361 }
2362
2363 static void update_mb_info(MpegEncContext *s, int startcode)
2364 {
2365     if (!s->mb_info)
2366         return;
2367     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2368         s->mb_info_size += 12;
2369         s->prev_mb_info = s->last_mb_info;
2370     }
2371     if (startcode) {
2372         s->prev_mb_info = put_bits_count(&s->pb)/8;
2373         /* This might have incremented mb_info_size above, and we return without
2374          * actually writing any info into that slot yet. But in that case,
2375          * this will be called again at the start of the after writing the
2376          * start code, actually writing the mb info. */
2377         return;
2378     }
2379
2380     s->last_mb_info = put_bits_count(&s->pb)/8;
2381     if (!s->mb_info_size)
2382         s->mb_info_size += 12;
2383     write_mb_info(s);
2384 }
2385
2386 static int encode_thread(AVCodecContext *c, void *arg){
2387     MpegEncContext *s= *(void**)arg;
2388     int mb_x, mb_y, pdif = 0;
2389     int chr_h= 16>>s->chroma_y_shift;
2390     int i, j;
2391     MpegEncContext best_s, backup_s;
2392     uint8_t bit_buf[2][MAX_MB_BYTES];
2393     uint8_t bit_buf2[2][MAX_MB_BYTES];
2394     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2395     PutBitContext pb[2], pb2[2], tex_pb[2];
2396
2397     ff_check_alignment();
2398
2399     for(i=0; i<2; i++){
2400         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2401         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2402         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2403     }
2404
2405     s->last_bits= put_bits_count(&s->pb);
2406     s->mv_bits=0;
2407     s->misc_bits=0;
2408     s->i_tex_bits=0;
2409     s->p_tex_bits=0;
2410     s->i_count=0;
2411     s->f_count=0;
2412     s->b_count=0;
2413     s->skip_count=0;
2414
2415     for(i=0; i<3; i++){
2416         /* init last dc values */
2417         /* note: quant matrix value (8) is implied here */
2418         s->last_dc[i] = 128 << s->intra_dc_precision;
2419
2420         s->current_picture.f.error[i] = 0;
2421     }
2422     s->mb_skip_run = 0;
2423     memset(s->last_mv, 0, sizeof(s->last_mv));
2424
2425     s->last_mv_dir = 0;
2426
2427     switch(s->codec_id){
2428     case AV_CODEC_ID_H263:
2429     case AV_CODEC_ID_H263P:
2430     case AV_CODEC_ID_FLV1:
2431         if (CONFIG_H263_ENCODER)
2432             s->gob_index = ff_h263_get_gob_height(s);
2433         break;
2434     case AV_CODEC_ID_MPEG4:
2435         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2436             ff_mpeg4_init_partitions(s);
2437         break;
2438     }
2439
2440     s->resync_mb_x=0;
2441     s->resync_mb_y=0;
2442     s->first_slice_line = 1;
2443     s->ptr_lastgob = s->pb.buf;
2444     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2445         s->mb_x=0;
2446         s->mb_y= mb_y;
2447
2448         ff_set_qscale(s, s->qscale);
2449         ff_init_block_index(s);
2450
2451         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2452             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2453             int mb_type= s->mb_type[xy];
2454 //            int d;
2455             int dmin= INT_MAX;
2456             int dir;
2457
2458             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2459                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2460                 return -1;
2461             }
2462             if(s->data_partitioning){
2463                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2464                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2465                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2466                     return -1;
2467                 }
2468             }
2469
2470             s->mb_x = mb_x;
2471             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2472             ff_update_block_index(s);
2473
2474             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2475                 ff_h261_reorder_mb_index(s);
2476                 xy= s->mb_y*s->mb_stride + s->mb_x;
2477                 mb_type= s->mb_type[xy];
2478             }
2479
2480             /* write gob / video packet header  */
2481             if(s->rtp_mode){
2482                 int current_packet_size, is_gob_start;
2483
2484                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2485
2486                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2487
2488                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2489
2490                 switch(s->codec_id){
2491                 case AV_CODEC_ID_H263:
2492                 case AV_CODEC_ID_H263P:
2493                     if(!s->h263_slice_structured)
2494                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2495                     break;
2496                 case AV_CODEC_ID_MPEG2VIDEO:
2497                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2498                 case AV_CODEC_ID_MPEG1VIDEO:
2499                     if(s->mb_skip_run) is_gob_start=0;
2500                     break;
2501                 }
2502
2503                 if(is_gob_start){
2504                     if(s->start_mb_y != mb_y || mb_x!=0){
2505                         write_slice_end(s);
2506
2507                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2508                             ff_mpeg4_init_partitions(s);
2509                         }
2510                     }
2511
2512                     assert((put_bits_count(&s->pb)&7) == 0);
2513                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2514
2515                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2516                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2517                         int d= 100 / s->avctx->error_rate;
2518                         if(r % d == 0){
2519                             current_packet_size=0;
2520                             s->pb.buf_ptr= s->ptr_lastgob;
2521                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2522                         }
2523                     }
2524
2525                     if (s->avctx->rtp_callback){
2526                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2527                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2528                     }
2529                     update_mb_info(s, 1);
2530
2531                     switch(s->codec_id){
2532                     case AV_CODEC_ID_MPEG4:
2533                         if (CONFIG_MPEG4_ENCODER) {
2534                             ff_mpeg4_encode_video_packet_header(s);
2535                             ff_mpeg4_clean_buffers(s);
2536                         }
2537                     break;
2538                     case AV_CODEC_ID_MPEG1VIDEO:
2539                     case AV_CODEC_ID_MPEG2VIDEO:
2540                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2541                             ff_mpeg1_encode_slice_header(s);
2542                             ff_mpeg1_clean_buffers(s);
2543                         }
2544                     break;
2545                     case AV_CODEC_ID_H263:
2546                     case AV_CODEC_ID_H263P:
2547                         if (CONFIG_H263_ENCODER)
2548                             ff_h263_encode_gob_header(s, mb_y);
2549                     break;
2550                     }
2551
2552                     if(s->flags&CODEC_FLAG_PASS1){
2553                         int bits= put_bits_count(&s->pb);
2554                         s->misc_bits+= bits - s->last_bits;
2555                         s->last_bits= bits;
2556                     }
2557
2558                     s->ptr_lastgob += current_packet_size;
2559                     s->first_slice_line=1;
2560                     s->resync_mb_x=mb_x;
2561                     s->resync_mb_y=mb_y;
2562                 }
2563             }
2564
2565             if(  (s->resync_mb_x   == s->mb_x)
2566                && s->resync_mb_y+1 == s->mb_y){
2567                 s->first_slice_line=0;
2568             }
2569
2570             s->mb_skipped=0;
2571             s->dquant=0; //only for QP_RD
2572
2573             update_mb_info(s, 0);
2574
2575             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2576                 int next_block=0;
2577                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2578
2579                 copy_context_before_encode(&backup_s, s, -1);
2580                 backup_s.pb= s->pb;
2581                 best_s.data_partitioning= s->data_partitioning;
2582                 best_s.partitioned_frame= s->partitioned_frame;
2583                 if(s->data_partitioning){
2584                     backup_s.pb2= s->pb2;
2585                     backup_s.tex_pb= s->tex_pb;
2586                 }
2587
2588                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2589                     s->mv_dir = MV_DIR_FORWARD;
2590                     s->mv_type = MV_TYPE_16X16;
2591                     s->mb_intra= 0;
2592                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2593                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2594                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2595                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2596                 }
2597                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2598                     s->mv_dir = MV_DIR_FORWARD;
2599                     s->mv_type = MV_TYPE_FIELD;
2600                     s->mb_intra= 0;
2601                     for(i=0; i<2; i++){
2602                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2603                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2604                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2605                     }
2606                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2607                                  &dmin, &next_block, 0, 0);
2608                 }
2609                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2610                     s->mv_dir = MV_DIR_FORWARD;
2611                     s->mv_type = MV_TYPE_16X16;
2612                     s->mb_intra= 0;
2613                     s->mv[0][0][0] = 0;
2614                     s->mv[0][0][1] = 0;
2615                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2616                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2617                 }
2618                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2619                     s->mv_dir = MV_DIR_FORWARD;
2620                     s->mv_type = MV_TYPE_8X8;
2621                     s->mb_intra= 0;
2622                     for(i=0; i<4; i++){
2623                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2624                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2625                     }
2626                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2627                                  &dmin, &next_block, 0, 0);
2628                 }
2629                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2630                     s->mv_dir = MV_DIR_FORWARD;
2631                     s->mv_type = MV_TYPE_16X16;
2632                     s->mb_intra= 0;
2633                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2634                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2635                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2636                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2637                 }
2638                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2639                     s->mv_dir = MV_DIR_BACKWARD;
2640                     s->mv_type = MV_TYPE_16X16;
2641                     s->mb_intra= 0;
2642                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2643                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2644                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2645                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2646                 }
2647                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2648                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2649                     s->mv_type = MV_TYPE_16X16;
2650                     s->mb_intra= 0;
2651                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2652                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2653                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2654                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2655                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2656                                  &dmin, &next_block, 0, 0);
2657                 }
2658                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2659                     s->mv_dir = MV_DIR_FORWARD;
2660                     s->mv_type = MV_TYPE_FIELD;
2661                     s->mb_intra= 0;
2662                     for(i=0; i<2; i++){
2663                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2664                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2665                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2666                     }
2667                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2668                                  &dmin, &next_block, 0, 0);
2669                 }
2670                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2671                     s->mv_dir = MV_DIR_BACKWARD;
2672                     s->mv_type = MV_TYPE_FIELD;
2673                     s->mb_intra= 0;
2674                     for(i=0; i<2; i++){
2675                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2676                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2677                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2678                     }
2679                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2680                                  &dmin, &next_block, 0, 0);
2681                 }
2682                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2683                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2684                     s->mv_type = MV_TYPE_FIELD;
2685                     s->mb_intra= 0;
2686                     for(dir=0; dir<2; dir++){
2687                         for(i=0; i<2; i++){
2688                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2689                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2690                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2691                         }
2692                     }
2693                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2694                                  &dmin, &next_block, 0, 0);
2695                 }
2696                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2697                     s->mv_dir = 0;
2698                     s->mv_type = MV_TYPE_16X16;
2699                     s->mb_intra= 1;
2700                     s->mv[0][0][0] = 0;
2701                     s->mv[0][0][1] = 0;
2702                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2703                                  &dmin, &next_block, 0, 0);
2704                     if(s->h263_pred || s->h263_aic){
2705                         if(best_s.mb_intra)
2706                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2707                         else
2708                             ff_clean_intra_table_entries(s); //old mode?
2709                     }
2710                 }
2711
2712                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2713                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2714                         const int last_qp= backup_s.qscale;
2715                         int qpi, qp, dc[6];
2716                         DCTELEM ac[6][16];
2717                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2718                         static const int dquant_tab[4]={-1,1,-2,2};
2719
2720                         assert(backup_s.dquant == 0);
2721
2722                         //FIXME intra
2723                         s->mv_dir= best_s.mv_dir;
2724                         s->mv_type = MV_TYPE_16X16;
2725                         s->mb_intra= best_s.mb_intra;
2726                         s->mv[0][0][0] = best_s.mv[0][0][0];
2727                         s->mv[0][0][1] = best_s.mv[0][0][1];
2728                         s->mv[1][0][0] = best_s.mv[1][0][0];
2729                         s->mv[1][0][1] = best_s.mv[1][0][1];
2730
2731                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2732                         for(; qpi<4; qpi++){
2733                             int dquant= dquant_tab[qpi];
2734                             qp= last_qp + dquant;
2735                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2736                                 continue;
2737                             backup_s.dquant= dquant;
2738                             if(s->mb_intra && s->dc_val[0]){
2739                                 for(i=0; i<6; i++){
2740                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2741                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2742                                 }
2743                             }
2744
2745                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2746                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2747                             if(best_s.qscale != qp){
2748                                 if(s->mb_intra && s->dc_val[0]){
2749                                     for(i=0; i<6; i++){
2750                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2751                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2752                                     }
2753                                 }
2754                             }
2755                         }
2756                     }
2757                 }
2758                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2759                     int mx= s->b_direct_mv_table[xy][0];
2760                     int my= s->b_direct_mv_table[xy][1];
2761
2762                     backup_s.dquant = 0;
2763                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2764                     s->mb_intra= 0;
2765                     ff_mpeg4_set_direct_mv(s, mx, my);
2766                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2767                                  &dmin, &next_block, mx, my);
2768                 }
2769                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2770                     backup_s.dquant = 0;
2771                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2772                     s->mb_intra= 0;
2773                     ff_mpeg4_set_direct_mv(s, 0, 0);
2774                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2775                                  &dmin, &next_block, 0, 0);
2776                 }
2777                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2778                     int coded=0;
2779                     for(i=0; i<6; i++)
2780                         coded |= s->block_last_index[i];
2781                     if(coded){
2782                         int mx,my;
2783                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2784                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2785                             mx=my=0; //FIXME find the one we actually used
2786                             ff_mpeg4_set_direct_mv(s, mx, my);
2787                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2788                             mx= s->mv[1][0][0];
2789                             my= s->mv[1][0][1];
2790                         }else{
2791                             mx= s->mv[0][0][0];
2792                             my= s->mv[0][0][1];
2793                         }
2794
2795                         s->mv_dir= best_s.mv_dir;
2796                         s->mv_type = best_s.mv_type;
2797                         s->mb_intra= 0;
2798 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2799                         s->mv[0][0][1] = best_s.mv[0][0][1];
2800                         s->mv[1][0][0] = best_s.mv[1][0][0];
2801                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2802                         backup_s.dquant= 0;
2803                         s->skipdct=1;
2804                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2805                                         &dmin, &next_block, mx, my);
2806                         s->skipdct=0;
2807                     }
2808                 }
2809
2810                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2811
2812                 copy_context_after_encode(s, &best_s, -1);
2813
2814                 pb_bits_count= put_bits_count(&s->pb);
2815                 flush_put_bits(&s->pb);
2816                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2817                 s->pb= backup_s.pb;
2818
2819                 if(s->data_partitioning){
2820                     pb2_bits_count= put_bits_count(&s->pb2);
2821                     flush_put_bits(&s->pb2);
2822                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2823                     s->pb2= backup_s.pb2;
2824
2825                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2826                     flush_put_bits(&s->tex_pb);
2827                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2828                     s->tex_pb= backup_s.tex_pb;
2829                 }
2830                 s->last_bits= put_bits_count(&s->pb);
2831
2832                 if (CONFIG_H263_ENCODER &&
2833                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2834                     ff_h263_update_motion_val(s);
2835
2836                 if(next_block==0){ //FIXME 16 vs linesize16
2837                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2838                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2839                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2840                 }
2841
2842                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2843                     ff_MPV_decode_mb(s, s->block);
2844             } else {
2845                 int motion_x = 0, motion_y = 0;
2846                 s->mv_type=MV_TYPE_16X16;
2847                 // only one MB-Type possible
2848
2849                 switch(mb_type){
2850                 case CANDIDATE_MB_TYPE_INTRA:
2851                     s->mv_dir = 0;
2852                     s->mb_intra= 1;
2853                     motion_x= s->mv[0][0][0] = 0;
2854                     motion_y= s->mv[0][0][1] = 0;
2855                     break;
2856                 case CANDIDATE_MB_TYPE_INTER:
2857                     s->mv_dir = MV_DIR_FORWARD;
2858                     s->mb_intra= 0;
2859                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2860                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2861                     break;
2862                 case CANDIDATE_MB_TYPE_INTER_I:
2863                     s->mv_dir = MV_DIR_FORWARD;
2864                     s->mv_type = MV_TYPE_FIELD;
2865                     s->mb_intra= 0;
2866                     for(i=0; i<2; i++){
2867                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2868                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2869                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2870                     }
2871                     break;
2872                 case CANDIDATE_MB_TYPE_INTER4V:
2873                     s->mv_dir = MV_DIR_FORWARD;
2874                     s->mv_type = MV_TYPE_8X8;
2875                     s->mb_intra= 0;
2876                     for(i=0; i<4; i++){
2877                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2878                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2879                     }
2880                     break;
2881                 case CANDIDATE_MB_TYPE_DIRECT:
2882                     if (CONFIG_MPEG4_ENCODER) {
2883                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2884                         s->mb_intra= 0;
2885                         motion_x=s->b_direct_mv_table[xy][0];
2886                         motion_y=s->b_direct_mv_table[xy][1];
2887                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2888                     }
2889                     break;
2890                 case CANDIDATE_MB_TYPE_DIRECT0:
2891                     if (CONFIG_MPEG4_ENCODER) {
2892                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2893                         s->mb_intra= 0;
2894                         ff_mpeg4_set_direct_mv(s, 0, 0);
2895                     }
2896                     break;
2897                 case CANDIDATE_MB_TYPE_BIDIR:
2898                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2899                     s->mb_intra= 0;
2900                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2901                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2902                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2903                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2904                     break;
2905                 case CANDIDATE_MB_TYPE_BACKWARD:
2906                     s->mv_dir = MV_DIR_BACKWARD;
2907                     s->mb_intra= 0;
2908                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2909                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2910                     break;
2911                 case CANDIDATE_MB_TYPE_FORWARD:
2912                     s->mv_dir = MV_DIR_FORWARD;
2913                     s->mb_intra= 0;
2914                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2915                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2916                     break;
2917                 case CANDIDATE_MB_TYPE_FORWARD_I:
2918                     s->mv_dir = MV_DIR_FORWARD;
2919                     s->mv_type = MV_TYPE_FIELD;
2920                     s->mb_intra= 0;
2921                     for(i=0; i<2; i++){
2922                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2923                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2924                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2925                     }
2926                     break;
2927                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2928                     s->mv_dir = MV_DIR_BACKWARD;
2929                     s->mv_type = MV_TYPE_FIELD;
2930                     s->mb_intra= 0;
2931                     for(i=0; i<2; i++){
2932                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2933                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2934                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2935                     }
2936                     break;
2937                 case CANDIDATE_MB_TYPE_BIDIR_I:
2938                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2939                     s->mv_type = MV_TYPE_FIELD;
2940                     s->mb_intra= 0;
2941                     for(dir=0; dir<2; dir++){
2942                         for(i=0; i<2; i++){
2943                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2944                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2945                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2946                         }
2947                     }
2948                     break;
2949                 default:
2950                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2951                 }
2952
2953                 encode_mb(s, motion_x, motion_y);
2954
2955                 // RAL: Update last macroblock type
2956                 s->last_mv_dir = s->mv_dir;
2957
2958                 if (CONFIG_H263_ENCODER &&
2959                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2960                     ff_h263_update_motion_val(s);
2961
2962                 ff_MPV_decode_mb(s, s->block);
2963             }
2964
2965             /* clean the MV table in IPS frames for direct mode in B frames */
2966             if(s->mb_intra /* && I,P,S_TYPE */){
2967                 s->p_mv_table[xy][0]=0;
2968                 s->p_mv_table[xy][1]=0;
2969             }
2970
2971             if(s->flags&CODEC_FLAG_PSNR){
2972                 int w= 16;
2973                 int h= 16;
2974
2975                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2976                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2977
2978                 s->current_picture.f.error[0] += sse(
2979                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2980                     s->dest[0], w, h, s->linesize);
2981                 s->current_picture.f.error[1] += sse(
2982                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2983                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2984                 s->current_picture.f.error[2] += sse(
2985                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2986                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2987             }
2988             if(s->loop_filter){
2989                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2990                     ff_h263_loop_filter(s);
2991             }
2992             av_dlog(s->avctx, "MB %d %d bits\n",
2993                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
2994         }
2995     }
2996
2997     //not beautiful here but we must write it before flushing so it has to be here
2998     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
2999         ff_msmpeg4_encode_ext_header(s);
3000
3001     write_slice_end(s);
3002
3003     /* Send the last GOB if RTP */
3004     if (s->avctx->rtp_callback) {
3005         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3006         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3007         /* Call the RTP callback to send the last GOB */
3008         emms_c();
3009         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3010     }
3011
3012     return 0;
3013 }
3014
3015 #define MERGE(field) dst->field += src->field; src->field=0
3016 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3017     MERGE(me.scene_change_score);
3018     MERGE(me.mc_mb_var_sum_temp);
3019     MERGE(me.mb_var_sum_temp);
3020 }
3021
3022 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3023     int i;
3024
3025     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3026     MERGE(dct_count[1]);
3027     MERGE(mv_bits);
3028     MERGE(i_tex_bits);
3029     MERGE(p_tex_bits);
3030     MERGE(i_count);
3031     MERGE(f_count);
3032     MERGE(b_count);
3033     MERGE(skip_count);
3034     MERGE(misc_bits);
3035     MERGE(error_count);
3036     MERGE(padding_bug_score);
3037     MERGE(current_picture.f.error[0]);
3038     MERGE(current_picture.f.error[1]);
3039     MERGE(current_picture.f.error[2]);
3040
3041     if(dst->avctx->noise_reduction){
3042         for(i=0; i<64; i++){
3043             MERGE(dct_error_sum[0][i]);
3044             MERGE(dct_error_sum[1][i]);
3045         }
3046     }
3047
3048     assert(put_bits_count(&src->pb) % 8 ==0);
3049     assert(put_bits_count(&dst->pb) % 8 ==0);
3050     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3051     flush_put_bits(&dst->pb);
3052 }
3053
3054 static int estimate_qp(MpegEncContext *s, int dry_run){
3055     if (s->next_lambda){
3056         s->current_picture_ptr->f.quality =
3057         s->current_picture.f.quality = s->next_lambda;
3058         if(!dry_run) s->next_lambda= 0;
3059     } else if (!s->fixed_qscale) {
3060         s->current_picture_ptr->f.quality =
3061         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3062         if (s->current_picture.f.quality < 0)
3063             return -1;
3064     }
3065
3066     if(s->adaptive_quant){
3067         switch(s->codec_id){
3068         case AV_CODEC_ID_MPEG4:
3069             if (CONFIG_MPEG4_ENCODER)
3070                 ff_clean_mpeg4_qscales(s);
3071             break;
3072         case AV_CODEC_ID_H263:
3073         case AV_CODEC_ID_H263P:
3074         case AV_CODEC_ID_FLV1:
3075             if (CONFIG_H263_ENCODER)
3076                 ff_clean_h263_qscales(s);
3077             break;
3078         default:
3079             ff_init_qscale_tab(s);
3080         }
3081
3082         s->lambda= s->lambda_table[0];
3083         //FIXME broken
3084     }else
3085         s->lambda = s->current_picture.f.quality;
3086     update_qscale(s);
3087     return 0;
3088 }
3089
3090 /* must be called before writing the header */
3091 static void set_frame_distances(MpegEncContext * s){
3092     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3093     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3094
3095     if(s->pict_type==AV_PICTURE_TYPE_B){
3096         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3097         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3098     }else{
3099         s->pp_time= s->time - s->last_non_b_time;
3100         s->last_non_b_time= s->time;
3101         assert(s->picture_number==0 || s->pp_time > 0);
3102     }
3103 }
3104
3105 static int encode_picture(MpegEncContext *s, int picture_number)
3106 {
3107     int i;
3108     int bits;
3109     int context_count = s->slice_context_count;
3110
3111     s->picture_number = picture_number;
3112
3113     /* Reset the average MB variance */
3114     s->me.mb_var_sum_temp    =
3115     s->me.mc_mb_var_sum_temp = 0;
3116
3117     /* we need to initialize some time vars before we can encode b-frames */
3118     // RAL: Condition added for MPEG1VIDEO
3119     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3120         set_frame_distances(s);
3121     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3122         ff_set_mpeg4_time(s);
3123
3124     s->me.scene_change_score=0;
3125
3126 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3127
3128     if(s->pict_type==AV_PICTURE_TYPE_I){
3129         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3130         else                        s->no_rounding=0;
3131     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3132         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3133             s->no_rounding ^= 1;
3134     }
3135
3136     if(s->flags & CODEC_FLAG_PASS2){
3137         if (estimate_qp(s,1) < 0)
3138             return -1;
3139         ff_get_2pass_fcode(s);
3140     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3141         if(s->pict_type==AV_PICTURE_TYPE_B)
3142             s->lambda= s->last_lambda_for[s->pict_type];
3143         else
3144             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3145         update_qscale(s);
3146     }
3147
3148     s->mb_intra=0; //for the rate distortion & bit compare functions
3149     for(i=1; i<context_count; i++){
3150         ff_update_duplicate_context(s->thread_context[i], s);
3151     }
3152
3153     if(ff_init_me(s)<0)
3154         return -1;
3155
3156     /* Estimate motion for every MB */
3157     if(s->pict_type != AV_PICTURE_TYPE_I){
3158         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3159         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3160         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3161             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3162                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3163             }
3164         }
3165
3166         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3167     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3168         /* I-Frame */
3169         for(i=0; i<s->mb_stride*s->mb_height; i++)
3170             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3171
3172         if(!s->fixed_qscale){
3173             /* finding spatial complexity for I-frame rate control */
3174             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3175         }
3176     }
3177     for(i=1; i<context_count; i++){
3178         merge_context_after_me(s, s->thread_context[i]);
3179     }
3180     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3181     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3182     emms_c();
3183
3184     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3185         s->pict_type= AV_PICTURE_TYPE_I;
3186         for(i=0; i<s->mb_stride*s->mb_height; i++)
3187             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3188         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3189                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3190     }
3191
3192     if(!s->umvplus){
3193         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3194             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3195
3196             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3197                 int a,b;
3198                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3199                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3200                 s->f_code= FFMAX3(s->f_code, a, b);
3201             }
3202
3203             ff_fix_long_p_mvs(s);
3204             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3205             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3206                 int j;
3207                 for(i=0; i<2; i++){
3208                     for(j=0; j<2; j++)
3209                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3210                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3211                 }
3212             }
3213         }
3214
3215         if(s->pict_type==AV_PICTURE_TYPE_B){
3216             int a, b;
3217
3218             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3219             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3220             s->f_code = FFMAX(a, b);
3221
3222             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3223             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3224             s->b_code = FFMAX(a, b);
3225
3226             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3227             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3228             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3229             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3230             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3231                 int dir, j;
3232                 for(dir=0; dir<2; dir++){
3233                     for(i=0; i<2; i++){
3234                         for(j=0; j<2; j++){
3235                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3236                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3237                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3238                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3239                         }
3240                     }
3241                 }
3242             }
3243         }
3244     }
3245
3246     if (estimate_qp(s, 0) < 0)
3247         return -1;
3248
3249     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3250         s->qscale= 3; //reduce clipping problems
3251
3252     if (s->out_format == FMT_MJPEG) {
3253         /* for mjpeg, we do include qscale in the matrix */
3254         for(i=1;i<64;i++){
3255             int j= s->dsp.idct_permutation[i];
3256
3257             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3258         }
3259         s->y_dc_scale_table=
3260         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3261         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3262         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3263                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3264         s->qscale= 8;
3265     }
3266
3267     //FIXME var duplication
3268     s->current_picture_ptr->f.key_frame =
3269     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3270     s->current_picture_ptr->f.pict_type =
3271     s->current_picture.f.pict_type = s->pict_type;
3272
3273     if (s->current_picture.f.key_frame)
3274         s->picture_in_gop_number=0;
3275
3276     s->last_bits= put_bits_count(&s->pb);
3277     switch(s->out_format) {
3278     case FMT_MJPEG:
3279         if (CONFIG_MJPEG_ENCODER)
3280             ff_mjpeg_encode_picture_header(s);
3281         break;
3282     case FMT_H261:
3283         if (CONFIG_H261_ENCODER)
3284             ff_h261_encode_picture_header(s, picture_number);
3285         break;
3286     case FMT_H263:
3287         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3288             ff_wmv2_encode_picture_header(s, picture_number);
3289         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3290             ff_msmpeg4_encode_picture_header(s, picture_number);
3291         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3292             ff_mpeg4_encode_picture_header(s, picture_number);
3293         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3294             ff_rv10_encode_picture_header(s, picture_number);
3295         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3296             ff_rv20_encode_picture_header(s, picture_number);
3297         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3298             ff_flv_encode_picture_header(s, picture_number);
3299         else if (CONFIG_H263_ENCODER)
3300             ff_h263_encode_picture_header(s, picture_number);
3301         break;
3302     case FMT_MPEG1:
3303         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3304             ff_mpeg1_encode_picture_header(s, picture_number);
3305         break;
3306     case FMT_H264:
3307         break;
3308     default:
3309         assert(0);
3310     }
3311     bits= put_bits_count(&s->pb);
3312     s->header_bits= bits - s->last_bits;
3313
3314     for(i=1; i<context_count; i++){
3315         update_duplicate_context_after_me(s->thread_context[i], s);
3316     }
3317     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3318     for(i=1; i<context_count; i++){
3319         merge_context_after_encode(s, s->thread_context[i]);
3320     }
3321     emms_c();
3322     return 0;
3323 }
3324
3325 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3326     const int intra= s->mb_intra;
3327     int i;
3328
3329     s->dct_count[intra]++;
3330
3331     for(i=0; i<64; i++){
3332         int level= block[i];
3333
3334         if(level){
3335             if(level>0){
3336                 s->dct_error_sum[intra][i] += level;
3337                 level -= s->dct_offset[intra][i];
3338                 if(level<0) level=0;
3339             }else{
3340                 s->dct_error_sum[intra][i] -= level;
3341                 level += s->dct_offset[intra][i];
3342                 if(level>0) level=0;
3343             }
3344             block[i]= level;
3345         }
3346     }
3347 }
3348
3349 static int dct_quantize_trellis_c(MpegEncContext *s,
3350                                   DCTELEM *block, int n,
3351                                   int qscale, int *overflow){
3352     const int *qmat;
3353     const uint8_t *scantable= s->intra_scantable.scantable;
3354     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3355     int max=0;
3356     unsigned int threshold1, threshold2;
3357     int bias=0;
3358     int run_tab[65];
3359     int level_tab[65];
3360     int score_tab[65];
3361     int survivor[65];
3362     int survivor_count;
3363     int last_run=0;
3364     int last_level=0;
3365     int last_score= 0;
3366     int last_i;
3367     int coeff[2][64];
3368     int coeff_count[64];
3369     int qmul, qadd, start_i, last_non_zero, i, dc;
3370     const int esc_length= s->ac_esc_length;
3371     uint8_t * length;
3372     uint8_t * last_length;
3373     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3374
3375     s->dsp.fdct (block);
3376
3377     if(s->dct_error_sum)
3378         s->denoise_dct(s, block);
3379     qmul= qscale*16;
3380     qadd= ((qscale-1)|1)*8;
3381
3382     if (s->mb_intra) {
3383         int q;
3384         if (!s->h263_aic) {
3385             if (n < 4)
3386                 q = s->y_dc_scale;
3387             else
3388                 q = s->c_dc_scale;
3389             q = q << 3;
3390         } else{
3391             /* For AIC we skip quant/dequant of INTRADC */
3392             q = 1 << 3;
3393             qadd=0;
3394         }
3395
3396         /* note: block[0] is assumed to be positive */
3397         block[0] = (block[0] + (q >> 1)) / q;
3398         start_i = 1;
3399         last_non_zero = 0;
3400         qmat = s->q_intra_matrix[qscale];
3401         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3402             bias= 1<<(QMAT_SHIFT-1);
3403         length     = s->intra_ac_vlc_length;
3404         last_length= s->intra_ac_vlc_last_length;
3405     } else {
3406         start_i = 0;
3407         last_non_zero = -1;
3408         qmat = s->q_inter_matrix[qscale];
3409         length     = s->inter_ac_vlc_length;
3410         last_length= s->inter_ac_vlc_last_length;
3411     }
3412     last_i= start_i;
3413
3414     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3415     threshold2= (threshold1<<1);
3416
3417     for(i=63; i>=start_i; i--) {
3418         const int j = scantable[i];
3419         int level = block[j] * qmat[j];
3420
3421         if(((unsigned)(level+threshold1))>threshold2){
3422             last_non_zero = i;
3423             break;
3424         }
3425     }
3426
3427     for(i=start_i; i<=last_non_zero; i++) {
3428         const int j = scantable[i];
3429         int level = block[j] * qmat[j];
3430
3431 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3432 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3433         if(((unsigned)(level+threshold1))>threshold2){
3434             if(level>0){
3435                 level= (bias + level)>>QMAT_SHIFT;
3436                 coeff[0][i]= level;
3437                 coeff[1][i]= level-1;
3438 //                coeff[2][k]= level-2;
3439             }else{
3440                 level= (bias - level)>>QMAT_SHIFT;
3441                 coeff[0][i]= -level;
3442                 coeff[1][i]= -level+1;
3443 //                coeff[2][k]= -level+2;
3444             }
3445             coeff_count[i]= FFMIN(level, 2);
3446             assert(coeff_count[i]);
3447             max |=level;
3448         }else{
3449             coeff[0][i]= (level>>31)|1;
3450             coeff_count[i]= 1;
3451         }
3452     }
3453
3454     *overflow= s->max_qcoeff < max; //overflow might have happened
3455
3456     if(last_non_zero < start_i){
3457         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3458         return last_non_zero;
3459     }
3460
3461     score_tab[start_i]= 0;
3462     survivor[0]= start_i;
3463     survivor_count= 1;
3464
3465     for(i=start_i; i<=last_non_zero; i++){
3466         int level_index, j, zero_distortion;
3467         int dct_coeff= FFABS(block[ scantable[i] ]);
3468         int best_score=256*256*256*120;
3469
3470         if (s->dsp.fdct == ff_fdct_ifast)
3471             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3472         zero_distortion= dct_coeff*dct_coeff;
3473
3474         for(level_index=0; level_index < coeff_count[i]; level_index++){
3475             int distortion;
3476             int level= coeff[level_index][i];
3477             const int alevel= FFABS(level);
3478             int unquant_coeff;
3479
3480             assert(level);
3481
3482             if(s->out_format == FMT_H263){
3483                 unquant_coeff= alevel*qmul + qadd;
3484             }else{ //MPEG1
3485                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3486                 if(s->mb_intra){
3487                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3488                         unquant_coeff =   (unquant_coeff - 1) | 1;
3489                 }else{
3490                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3491                         unquant_coeff =   (unquant_coeff - 1) | 1;
3492                 }
3493                 unquant_coeff<<= 3;
3494             }
3495
3496             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3497             level+=64;
3498             if((level&(~127)) == 0){
3499                 for(j=survivor_count-1; j>=0; j--){
3500                     int run= i - survivor[j];
3501                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3502                     score += score_tab[i-run];
3503
3504                     if(score < best_score){
3505                         best_score= score;
3506                         run_tab[i+1]= run;
3507                         level_tab[i+1]= level-64;
3508                     }
3509                 }
3510
3511                 if(s->out_format == FMT_H263){
3512                     for(j=survivor_count-1; j>=0; j--){
3513                         int run= i - survivor[j];
3514                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3515                         score += score_tab[i-run];
3516                         if(score < last_score){
3517                             last_score= score;
3518                             last_run= run;
3519                             last_level= level-64;
3520                             last_i= i+1;
3521                         }
3522                     }
3523                 }
3524             }else{
3525                 distortion += esc_length*lambda;
3526                 for(j=survivor_count-1; j>=0; j--){
3527                     int run= i - survivor[j];
3528                     int score= distortion + score_tab[i-run];
3529
3530                     if(score < best_score){
3531                         best_score= score;
3532                         run_tab[i+1]= run;
3533                         level_tab[i+1]= level-64;
3534                     }
3535                 }
3536
3537                 if(s->out_format == FMT_H263){
3538                   for(j=survivor_count-1; j>=0; j--){
3539                         int run= i - survivor[j];
3540                         int score= distortion + score_tab[i-run];
3541                         if(score < last_score){
3542                             last_score= score;
3543                             last_run= run;
3544                             last_level= level-64;
3545                             last_i= i+1;
3546                         }
3547                     }
3548                 }
3549             }
3550         }
3551
3552         score_tab[i+1]= best_score;
3553
3554         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3555         if(last_non_zero <= 27){
3556             for(; survivor_count; survivor_count--){
3557                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3558                     break;
3559             }
3560         }else{
3561             for(; survivor_count; survivor_count--){
3562                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3563                     break;
3564             }
3565         }
3566
3567         survivor[ survivor_count++ ]= i+1;
3568     }
3569
3570     if(s->out_format != FMT_H263){
3571         last_score= 256*256*256*120;
3572         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3573             int score= score_tab[i];
3574             if(i) score += lambda*2; //FIXME exacter?
3575
3576             if(score < last_score){
3577                 last_score= score;
3578                 last_i= i;
3579                 last_level= level_tab[i];
3580                 last_run= run_tab[i];
3581             }
3582         }
3583     }
3584
3585     s->coded_score[n] = last_score;
3586
3587     dc= FFABS(block[0]);
3588     last_non_zero= last_i - 1;
3589     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3590
3591     if(last_non_zero < start_i)
3592         return last_non_zero;
3593
3594     if(last_non_zero == 0 && start_i == 0){
3595         int best_level= 0;
3596         int best_score= dc * dc;
3597
3598         for(i=0; i<coeff_count[0]; i++){
3599             int level= coeff[i][0];
3600             int alevel= FFABS(level);
3601             int unquant_coeff, score, distortion;
3602
3603             if(s->out_format == FMT_H263){
3604                     unquant_coeff= (alevel*qmul + qadd)>>3;
3605             }else{ //MPEG1
3606                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3607                     unquant_coeff =   (unquant_coeff - 1) | 1;
3608             }
3609             unquant_coeff = (unquant_coeff + 4) >> 3;
3610             unquant_coeff<<= 3 + 3;
3611
3612             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3613             level+=64;
3614             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3615             else                    score= distortion + esc_length*lambda;
3616
3617             if(score < best_score){
3618                 best_score= score;
3619                 best_level= level - 64;
3620             }
3621         }
3622         block[0]= best_level;
3623         s->coded_score[n] = best_score - dc*dc;
3624         if(best_level == 0) return -1;
3625         else                return last_non_zero;
3626     }
3627
3628     i= last_i;
3629     assert(last_level);
3630
3631     block[ perm_scantable[last_non_zero] ]= last_level;
3632     i -= last_run + 1;
3633
3634     for(; i>start_i; i -= run_tab[i] + 1){
3635         block[ perm_scantable[i-1] ]= level_tab[i];
3636     }
3637
3638     return last_non_zero;
3639 }
3640
3641 //#define REFINE_STATS 1
3642 static int16_t basis[64][64];
3643
3644 static void build_basis(uint8_t *perm){
3645     int i, j, x, y;
3646     emms_c();
3647     for(i=0; i<8; i++){
3648         for(j=0; j<8; j++){
3649             for(y=0; y<8; y++){
3650                 for(x=0; x<8; x++){
3651                     double s= 0.25*(1<<BASIS_SHIFT);
3652                     int index= 8*i + j;
3653                     int perm_index= perm[index];
3654                     if(i==0) s*= sqrt(0.5);
3655                     if(j==0) s*= sqrt(0.5);
3656                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3657                 }
3658             }
3659         }
3660     }
3661 }
3662
3663 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3664                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3665                         int n, int qscale){
3666     int16_t rem[64];
3667     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3668     const uint8_t *scantable= s->intra_scantable.scantable;
3669     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3670 //    unsigned int threshold1, threshold2;
3671 //    int bias=0;
3672     int run_tab[65];
3673     int prev_run=0;
3674     int prev_level=0;
3675     int qmul, qadd, start_i, last_non_zero, i, dc;
3676     uint8_t * length;
3677     uint8_t * last_length;
3678     int lambda;
3679     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3680 #ifdef REFINE_STATS
3681 static int count=0;
3682 static int after_last=0;
3683 static int to_zero=0;
3684 static int from_zero=0;
3685 static int raise=0;
3686 static int lower=0;
3687 static int messed_sign=0;
3688 #endif
3689
3690     if(basis[0][0] == 0)
3691         build_basis(s->dsp.idct_permutation);
3692
3693     qmul= qscale*2;
3694     qadd= (qscale-1)|1;
3695     if (s->mb_intra) {
3696         if (!s->h263_aic) {
3697             if (n < 4)
3698                 q = s->y_dc_scale;
3699             else
3700                 q = s->c_dc_scale;
3701         } else{
3702             /* For AIC we skip quant/dequant of INTRADC */
3703             q = 1;
3704             qadd=0;
3705         }
3706         q <<= RECON_SHIFT-3;
3707         /* note: block[0] is assumed to be positive */
3708         dc= block[0]*q;
3709 //        block[0] = (block[0] + (q >> 1)) / q;
3710         start_i = 1;
3711 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3712 //            bias= 1<<(QMAT_SHIFT-1);
3713         length     = s->intra_ac_vlc_length;
3714         last_length= s->intra_ac_vlc_last_length;
3715     } else {
3716         dc= 0;
3717         start_i = 0;
3718         length     = s->inter_ac_vlc_length;
3719         last_length= s->inter_ac_vlc_last_length;
3720     }
3721     last_non_zero = s->block_last_index[n];
3722
3723 #ifdef REFINE_STATS
3724 {START_TIMER
3725 #endif
3726     dc += (1<<(RECON_SHIFT-1));
3727     for(i=0; i<64; i++){
3728         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3729     }
3730 #ifdef REFINE_STATS
3731 STOP_TIMER("memset rem[]")}
3732 #endif
3733     sum=0;
3734     for(i=0; i<64; i++){
3735         int one= 36;
3736         int qns=4;
3737         int w;
3738
3739         w= FFABS(weight[i]) + qns*one;
3740         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3741
3742         weight[i] = w;
3743 //        w=weight[i] = (63*qns + (w/2)) / w;
3744
3745         assert(w>0);
3746         assert(w<(1<<6));
3747         sum += w*w;
3748     }
3749     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3750 #ifdef REFINE_STATS
3751 {START_TIMER
3752 #endif
3753     run=0;
3754     rle_index=0;
3755     for(i=start_i; i<=last_non_zero; i++){
3756         int j= perm_scantable[i];
3757         const int level= block[j];
3758         int coeff;
3759
3760         if(level){
3761             if(level<0) coeff= qmul*level - qadd;
3762             else        coeff= qmul*level + qadd;
3763             run_tab[rle_index++]=run;
3764             run=0;
3765
3766             s->dsp.add_8x8basis(rem, basis[j], coeff);
3767         }else{
3768             run++;
3769         }
3770     }
3771 #ifdef REFINE_STATS
3772 if(last_non_zero>0){
3773 STOP_TIMER("init rem[]")
3774 }
3775 }
3776
3777 {START_TIMER
3778 #endif
3779     for(;;){
3780         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3781         int best_coeff=0;
3782         int best_change=0;
3783         int run2, best_unquant_change=0, analyze_gradient;
3784 #ifdef REFINE_STATS
3785 {START_TIMER
3786 #endif
3787         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3788
3789         if(analyze_gradient){
3790 #ifdef REFINE_STATS
3791 {START_TIMER
3792 #endif
3793             for(i=0; i<64; i++){
3794                 int w= weight[i];
3795
3796                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3797             }
3798 #ifdef REFINE_STATS
3799 STOP_TIMER("rem*w*w")}
3800 {START_TIMER
3801 #endif
3802             s->dsp.fdct(d1);
3803 #ifdef REFINE_STATS
3804 STOP_TIMER("dct")}
3805 #endif
3806         }
3807
3808         if(start_i){
3809             const int level= block[0];
3810             int change, old_coeff;
3811
3812             assert(s->mb_intra);
3813
3814             old_coeff= q*level;
3815
3816             for(change=-1; change<=1; change+=2){
3817                 int new_level= level + change;
3818                 int score, new_coeff;
3819
3820                 new_coeff= q*new_level;
3821                 if(new_coeff >= 2048 || new_coeff < 0)
3822                     continue;
3823
3824                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3825                 if(score<best_score){
3826                     best_score= score;
3827                     best_coeff= 0;
3828                     best_change= change;
3829                     best_unquant_change= new_coeff - old_coeff;
3830                 }
3831             }
3832         }
3833
3834         run=0;
3835         rle_index=0;
3836         run2= run_tab[rle_index++];
3837         prev_level=0;
3838         prev_run=0;
3839
3840         for(i=start_i; i<64; i++){
3841             int j= perm_scantable[i];
3842             const int level= block[j];
3843             int change, old_coeff;
3844
3845             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3846                 break;
3847
3848             if(level){
3849                 if(level<0) old_coeff= qmul*level - qadd;
3850                 else        old_coeff= qmul*level + qadd;
3851                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3852             }else{
3853                 old_coeff=0;
3854                 run2--;
3855                 assert(run2>=0 || i >= last_non_zero );
3856             }
3857
3858             for(change=-1; change<=1; change+=2){
3859                 int new_level= level + change;
3860                 int score, new_coeff, unquant_change;
3861
3862                 score=0;
3863                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3864                    continue;
3865
3866                 if(new_level){
3867                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3868                     else            new_coeff= qmul*new_level + qadd;
3869                     if(new_coeff >= 2048 || new_coeff <= -2048)
3870                         continue;
3871                     //FIXME check for overflow
3872
3873                     if(level){
3874                         if(level < 63 && level > -63){
3875                             if(i < last_non_zero)
3876                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3877                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3878                             else
3879                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3880                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3881                         }
3882                     }else{
3883                         assert(FFABS(new_level)==1);
3884
3885                         if(analyze_gradient){
3886                             int g= d1[ scantable[i] ];
3887                             if(g && (g^new_level) >= 0)
3888                                 continue;
3889                         }
3890
3891                         if(i < last_non_zero){
3892                             int next_i= i + run2 + 1;
3893                             int next_level= block[ perm_scantable[next_i] ] + 64;
3894
3895                             if(next_level&(~127))
3896                                 next_level= 0;
3897
3898                             if(next_i < last_non_zero)
3899                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3900                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3901                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3902                             else
3903                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3904                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3905                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3906                         }else{
3907                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3908                             if(prev_level){
3909                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3910                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3911                             }
3912                         }
3913                     }
3914                 }else{
3915                     new_coeff=0;
3916                     assert(FFABS(level)==1);
3917
3918                     if(i < last_non_zero){
3919                         int next_i= i + run2 + 1;
3920                         int next_level= block[ perm_scantable[next_i] ] + 64;
3921
3922                         if(next_level&(~127))
3923                             next_level= 0;
3924
3925                         if(next_i < last_non_zero)
3926                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3927                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3928                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3929                         else
3930                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3931                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3932                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3933                     }else{
3934                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3935                         if(prev_level){
3936                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3937                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3938                         }
3939                     }
3940                 }
3941
3942                 score *= lambda;
3943
3944                 unquant_change= new_coeff - old_coeff;
3945                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3946
3947                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3948                 if(score<best_score){
3949                     best_score= score;
3950                     best_coeff= i;
3951                     best_change= change;
3952                     best_unquant_change= unquant_change;
3953                 }
3954             }
3955             if(level){
3956                 prev_level= level + 64;
3957                 if(prev_level&(~127))
3958                     prev_level= 0;
3959                 prev_run= run;
3960                 run=0;
3961             }else{
3962                 run++;
3963             }
3964         }
3965 #ifdef REFINE_STATS
3966 STOP_TIMER("iterative step")}
3967 #endif
3968
3969         if(best_change){
3970             int j= perm_scantable[ best_coeff ];
3971
3972             block[j] += best_change;
3973
3974             if(best_coeff > last_non_zero){
3975                 last_non_zero= best_coeff;
3976                 assert(block[j]);
3977 #ifdef REFINE_STATS
3978 after_last++;
3979 #endif
3980             }else{
3981 #ifdef REFINE_STATS
3982 if(block[j]){
3983     if(block[j] - best_change){
3984         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3985             raise++;
3986         }else{
3987             lower++;
3988         }
3989     }else{
3990         from_zero++;
3991     }
3992 }else{
3993     to_zero++;
3994 }
3995 #endif
3996                 for(; last_non_zero>=start_i; last_non_zero--){
3997                     if(block[perm_scantable[last_non_zero]])
3998                         break;
3999                 }
4000             }
4001 #ifdef REFINE_STATS
4002 count++;
4003 if(256*256*256*64 % count == 0){
4004     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4005 }
4006 #endif
4007             run=0;
4008             rle_index=0;
4009             for(i=start_i; i<=last_non_zero; i++){
4010                 int j= perm_scantable[i];
4011                 const int level= block[j];
4012
4013                  if(level){
4014                      run_tab[rle_index++]=run;
4015                      run=0;
4016                  }else{
4017                      run++;
4018                  }
4019             }
4020
4021             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4022         }else{
4023             break;
4024         }
4025     }
4026 #ifdef REFINE_STATS
4027 if(last_non_zero>0){
4028 STOP_TIMER("iterative search")
4029 }
4030 }
4031 #endif
4032
4033     return last_non_zero;
4034 }
4035
4036 int ff_dct_quantize_c(MpegEncContext *s,
4037                         DCTELEM *block, int n,
4038                         int qscale, int *overflow)
4039 {
4040     int i, j, level, last_non_zero, q, start_i;
4041     const int *qmat;
4042     const uint8_t *scantable= s->intra_scantable.scantable;
4043     int bias;
4044     int max=0;
4045     unsigned int threshold1, threshold2;
4046
4047     s->dsp.fdct (block);
4048
4049     if(s->dct_error_sum)
4050         s->denoise_dct(s, block);
4051
4052     if (s->mb_intra) {
4053         if (!s->h263_aic) {
4054             if (n < 4)
4055                 q = s->y_dc_scale;
4056             else
4057                 q = s->c_dc_scale;
4058             q = q << 3;
4059         } else
4060             /* For AIC we skip quant/dequant of INTRADC */
4061             q = 1 << 3;
4062
4063         /* note: block[0] is assumed to be positive */
4064         block[0] = (block[0] + (q >> 1)) / q;
4065         start_i = 1;
4066         last_non_zero = 0;
4067         qmat = s->q_intra_matrix[qscale];
4068         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4069     } else {
4070         start_i = 0;
4071         last_non_zero = -1;
4072         qmat = s->q_inter_matrix[qscale];
4073         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4074     }
4075     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4076     threshold2= (threshold1<<1);
4077     for(i=63;i>=start_i;i--) {
4078         j = scantable[i];
4079         level = block[j] * qmat[j];
4080
4081         if(((unsigned)(level+threshold1))>threshold2){
4082             last_non_zero = i;
4083             break;
4084         }else{
4085             block[j]=0;
4086         }
4087     }
4088     for(i=start_i; i<=last_non_zero; i++) {
4089         j = scantable[i];
4090         level = block[j] * qmat[j];
4091
4092 //        if(   bias+level >= (1<<QMAT_SHIFT)
4093 //           || bias-level >= (1<<QMAT_SHIFT)){
4094         if(((unsigned)(level+threshold1))>threshold2){
4095             if(level>0){
4096                 level= (bias + level)>>QMAT_SHIFT;
4097                 block[j]= level;
4098             }else{
4099                 level= (bias - level)>>QMAT_SHIFT;
4100                 block[j]= -level;
4101             }
4102             max |=level;
4103         }else{
4104             block[j]=0;
4105         }
4106     }
4107     *overflow= s->max_qcoeff < max; //overflow might have happened
4108
4109     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4110     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4111         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4112
4113     return last_non_zero;
4114 }
4115
4116 #define OFFSET(x) offsetof(MpegEncContext, x)
4117 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4118 static const AVOption h263_options[] = {
4119     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4120     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4121     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4122     FF_MPV_COMMON_OPTS
4123     { NULL },
4124 };
4125
4126 static const AVClass h263_class = {
4127     .class_name = "H.263 encoder",
4128     .item_name  = av_default_item_name,
4129     .option     = h263_options,
4130     .version    = LIBAVUTIL_VERSION_INT,
4131 };
4132
4133 AVCodec ff_h263_encoder = {
4134     .name           = "h263",
4135     .type           = AVMEDIA_TYPE_VIDEO,
4136     .id             = AV_CODEC_ID_H263,
4137     .priv_data_size = sizeof(MpegEncContext),
4138     .init           = ff_MPV_encode_init,
4139     .encode2        = ff_MPV_encode_picture,
4140     .close          = ff_MPV_encode_end,
4141     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4142     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4143     .priv_class     = &h263_class,
4144 };
4145
4146 static const AVOption h263p_options[] = {
4147     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4148     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4149     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4150     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4151     FF_MPV_COMMON_OPTS
4152     { NULL },
4153 };
4154 static const AVClass h263p_class = {
4155     .class_name = "H.263p encoder",
4156     .item_name  = av_default_item_name,
4157     .option     = h263p_options,
4158     .version    = LIBAVUTIL_VERSION_INT,
4159 };
4160
4161 AVCodec ff_h263p_encoder = {
4162     .name           = "h263p",
4163     .type           = AVMEDIA_TYPE_VIDEO,
4164     .id             = AV_CODEC_ID_H263P,
4165     .priv_data_size = sizeof(MpegEncContext),
4166     .init           = ff_MPV_encode_init,
4167     .encode2        = ff_MPV_encode_picture,
4168     .close          = ff_MPV_encode_end,
4169     .capabilities   = CODEC_CAP_SLICE_THREADS,
4170     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4171     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4172     .priv_class     = &h263p_class,
4173 };
4174
4175 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4176
4177 AVCodec ff_msmpeg4v2_encoder = {
4178     .name           = "msmpeg4v2",
4179     .type           = AVMEDIA_TYPE_VIDEO,
4180     .id             = AV_CODEC_ID_MSMPEG4V2,
4181     .priv_data_size = sizeof(MpegEncContext),
4182     .init           = ff_MPV_encode_init,
4183     .encode2        = ff_MPV_encode_picture,
4184     .close          = ff_MPV_encode_end,
4185     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4186     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4187     .priv_class     = &msmpeg4v2_class,
4188 };
4189
4190 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4191
4192 AVCodec ff_msmpeg4v3_encoder = {
4193     .name           = "msmpeg4",
4194     .type           = AVMEDIA_TYPE_VIDEO,
4195     .id             = AV_CODEC_ID_MSMPEG4V3,
4196     .priv_data_size = sizeof(MpegEncContext),
4197     .init           = ff_MPV_encode_init,
4198     .encode2        = ff_MPV_encode_picture,
4199     .close          = ff_MPV_encode_end,
4200     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4201     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4202     .priv_class     = &msmpeg4v3_class,
4203 };
4204
4205 FF_MPV_GENERIC_CLASS(wmv1)
4206
4207 AVCodec ff_wmv1_encoder = {
4208     .name           = "wmv1",
4209     .type           = AVMEDIA_TYPE_VIDEO,
4210     .id             = AV_CODEC_ID_WMV1,
4211     .priv_data_size = sizeof(MpegEncContext),
4212     .init           = ff_MPV_encode_init,
4213     .encode2        = ff_MPV_encode_picture,
4214     .close          = ff_MPV_encode_end,
4215     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4216     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4217     .priv_class     = &wmv1_class,
4218 };