git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "avcodec.h"
  38 #include "dct.h"
  39 #include "dsputil.h"
  40 #include "mpeg12.h"
  41 #include "mpegvideo.h"
  42 #include "h261.h"
  43 #include "h263.h"
  44 #include "mathops.h"
  45 #include "mjpegenc.h"
  46 #include "msmpeg4.h"
  47 #include "faandct.h"
  48 #include "thread.h"
  49 #include "aandcttab.h"
  50 #include "flv.h"
  51 #include "mpeg4video.h"
  52 #include "internal.h"
  53 #include "bytestream.h"
  54 #include <limits.h>
  55
  56 static int encode_picture(MpegEncContext *s, int picture_number);
  57 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  58 static int sse_mb(MpegEncContext *s);
  59 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  60 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  61
  62 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  63 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  64
  65 const AVOption ff_mpv_generic_options[] = {
  66     FF_MPV_COMMON_OPTS
  67     { NULL },
  68 };
  69
  70 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
  71                        uint16_t (*qmat16)[2][64],
  72                        const uint16_t *quant_matrix,
  73                        int bias, int qmin, int qmax, int intra)
  74 {
  75     int qscale;
  76     int shift = 0;
  77
  78     for (qscale = qmin; qscale <= qmax; qscale++) {
  79         int i;
  80         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
  81             dsp->fdct == ff_jpeg_fdct_islow_10 ||
  82             dsp->fdct == ff_faandct) {
  83             for (i = 0; i < 64; i++) {
  84                 const int j = dsp->idct_permutation[i];
  85                 /* 16 <= qscale * quant_matrix[i] <= 7905
  86                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  87                  *             19952 <=              x  <= 249205026
  88                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
  89                  *           3444240 >= (1 << 36) / (x) >= 275 */
  90
  91                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
  92                                         (qscale * quant_matrix[j]));
  93             }
  94         } else if (dsp->fdct == ff_fdct_ifast) {
  95             for (i = 0; i < 64; i++) {
  96                 const int j = dsp->idct_permutation[i];
  97                 /* 16 <= qscale * quant_matrix[i] <= 7905
  98                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  99                  *             19952 <=              x  <= 249205026
 100                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 101                  *           3444240 >= (1 << 36) / (x) >= 275 */
 102
 103                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 104                                         (ff_aanscales[i] * qscale *
 105                                          quant_matrix[j]));
 106             }
 107         } else {
 108             for (i = 0; i < 64; i++) {
 109                 const int j = dsp->idct_permutation[i];
 110                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 111                  * Assume x = qscale * quant_matrix[i]
 112                  * So             16 <=              x  <= 7905
 113                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 114                  * so          32768 >= (1 << 19) / (x) >= 67 */
 115                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 116                                         (qscale * quant_matrix[j]));
 117                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 118                 //                    (qscale * quant_matrix[i]);
 119                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 120                                        (qscale * quant_matrix[j]);
 121
 122                 if (qmat16[qscale][0][i] == 0 ||
 123                     qmat16[qscale][0][i] == 128 * 256)
 124                     qmat16[qscale][0][i] = 128 * 256 - 1;
 125                 qmat16[qscale][1][i] =
 126                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 127                                 qmat16[qscale][0][i]);
 128             }
 129         }
 130
 131         for (i = intra; i < 64; i++) {
 132             int64_t max = 8191;
 133             if (dsp->fdct == ff_fdct_ifast) {
 134                 max = (8191LL * ff_aanscales[i]) >> 14;
 135             }
 136             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 137                 shift++;
 138             }
 139         }
 140     }
 141     if (shift) {
 142         av_log(NULL, AV_LOG_INFO,
 143                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 144                QMAT_SHIFT - shift);
 145     }
 146 }
 147
 148 static inline void update_qscale(MpegEncContext *s)
 149 {
 150     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 151                 (FF_LAMBDA_SHIFT + 7);
 152     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 153
 154     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 155                  FF_LAMBDA_SHIFT;
 156 }
 157
 158 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 159 {
 160     int i;
 161
 162     if (matrix) {
 163         put_bits(pb, 1, 1);
 164         for (i = 0; i < 64; i++) {
 165             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 166         }
 167     } else
 168         put_bits(pb, 1, 0);
 169 }
 170
 171 /**
 172  * init s->current_picture.qscale_table from s->lambda_table
 173  */
 174 void ff_init_qscale_tab(MpegEncContext *s)
 175 {
 176     int8_t * const qscale_table = s->current_picture.qscale_table;
 177     int i;
 178
 179     for (i = 0; i < s->mb_num; i++) {
 180         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 181         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 182         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 183                                                   s->avctx->qmax);
 184     }
 185 }
 186
 187 static void update_duplicate_context_after_me(MpegEncContext *dst,
 188                                               MpegEncContext *src)
 189 {
 190 #define COPY(a) dst->a= src->a
 191     COPY(pict_type);
 192     COPY(current_picture);
 193     COPY(f_code);
 194     COPY(b_code);
 195     COPY(qscale);
 196     COPY(lambda);
 197     COPY(lambda2);
 198     COPY(picture_in_gop_number);
 199     COPY(gop_picture_number);
 200     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 201     COPY(progressive_frame);    // FIXME don't set in encode_header
 202     COPY(partitioned_frame);    // FIXME don't set in encode_header
 203 #undef COPY
 204 }
 205
 206 /**
 207  * Set the given MpegEncContext to defaults for encoding.
 208  * the changed fields will not depend upon the prior state of the MpegEncContext.
 209  */
 210 static void MPV_encode_defaults(MpegEncContext *s)
 211 {
 212     int i;
 213     ff_MPV_common_defaults(s);
 214
 215     for (i = -16; i < 16; i++) {
 216         default_fcode_tab[i + MAX_MV] = 1;
 217     }
 218     s->me.mv_penalty = default_mv_penalty;
 219     s->fcode_tab     = default_fcode_tab;
 220
 221     s->input_picture_number  = 0;
 222     s->picture_in_gop_number = 0;
 223 }
 224
 225 /* init video encoder */
 226 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
 227 {
 228     MpegEncContext *s = avctx->priv_data;
 229     int i, ret;
 230
 231     MPV_encode_defaults(s);
 232
 233     switch (avctx->codec_id) {
 234     case AV_CODEC_ID_MPEG2VIDEO:
 235         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 236             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 237             av_log(avctx, AV_LOG_ERROR,
 238                    "only YUV420 and YUV422 are supported\n");
 239             return -1;
 240         }
 241         break;
 242     case AV_CODEC_ID_MJPEG:
 243         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
 244             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
 245             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 246               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
 247              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
 248             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 249             return -1;
 250         }
 251         break;
 252     default:
 253         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 254             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 255             return -1;
 256         }
 257     }
 258
 259     switch (avctx->pix_fmt) {
 260     case AV_PIX_FMT_YUVJ422P:
 261     case AV_PIX_FMT_YUV422P:
 262         s->chroma_format = CHROMA_422;
 263         break;
 264     case AV_PIX_FMT_YUVJ420P:
 265     case AV_PIX_FMT_YUV420P:
 266     default:
 267         s->chroma_format = CHROMA_420;
 268         break;
 269     }
 270
 271     s->bit_rate = avctx->bit_rate;
 272     s->width    = avctx->width;
 273     s->height   = avctx->height;
 274     if (avctx->gop_size > 600 &&
 275         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 276         av_log(avctx, AV_LOG_ERROR,
 277                "Warning keyframe interval too large! reducing it ...\n");
 278         avctx->gop_size = 600;
 279     }
 280     s->gop_size     = avctx->gop_size;
 281     s->avctx        = avctx;
 282     s->flags        = avctx->flags;
 283     s->flags2       = avctx->flags2;
 284     if (avctx->max_b_frames > MAX_B_FRAMES) {
 285         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 286                "is %d.\n", MAX_B_FRAMES);
 287     }
 288     s->max_b_frames = avctx->max_b_frames;
 289     s->codec_id     = avctx->codec->id;
 290     s->strict_std_compliance = avctx->strict_std_compliance;
 291     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 292     s->mpeg_quant         = avctx->mpeg_quant;
 293     s->rtp_mode           = !!avctx->rtp_payload_size;
 294     s->intra_dc_precision = avctx->intra_dc_precision;
 295     s->user_specified_pts = AV_NOPTS_VALUE;
 296
 297     if (s->gop_size <= 1) {
 298         s->intra_only = 1;
 299         s->gop_size   = 12;
 300     } else {
 301         s->intra_only = 0;
 302     }
 303
 304     s->me_method = avctx->me_method;
 305
 306     /* Fixed QSCALE */
 307     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 308
 309     s->adaptive_quant = (s->avctx->lumi_masking ||
 310                          s->avctx->dark_masking ||
 311                          s->avctx->temporal_cplx_masking ||
 312                          s->avctx->spatial_cplx_masking  ||
 313                          s->avctx->p_masking      ||
 314                          s->avctx->border_masking ||
 315                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 316                         !s->fixed_qscale;
 317
 318     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 319
 320     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 321         av_log(avctx, AV_LOG_ERROR,
 322                "a vbv buffer size is needed, "
 323                "for encoding with a maximum bitrate\n");
 324         return -1;
 325     }
 326
 327     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 328         av_log(avctx, AV_LOG_INFO,
 329                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 330     }
 331
 332     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 333         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 334         return -1;
 335     }
 336
 337     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 338         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 339         return -1;
 340     }
 341
 342     if (avctx->rc_max_rate &&
 343         avctx->rc_max_rate == avctx->bit_rate &&
 344         avctx->rc_max_rate != avctx->rc_min_rate) {
 345         av_log(avctx, AV_LOG_INFO,
 346                "impossible bitrate constraints, this will fail\n");
 347     }
 348
 349     if (avctx->rc_buffer_size &&
 350         avctx->bit_rate * (int64_t)avctx->time_base.num >
 351             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 352         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 353         return -1;
 354     }
 355
 356     if (!s->fixed_qscale &&
 357         avctx->bit_rate * av_q2d(avctx->time_base) >
 358             avctx->bit_rate_tolerance) {
 359         av_log(avctx, AV_LOG_ERROR,
 360                "bitrate tolerance too small for bitrate\n");
 361         return -1;
 362     }
 363
 364     if (s->avctx->rc_max_rate &&
 365         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 366         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 367          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 368         90000LL * (avctx->rc_buffer_size - 1) >
 369             s->avctx->rc_max_rate * 0xFFFFLL) {
 370         av_log(avctx, AV_LOG_INFO,
 371                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 372                "specified vbv buffer is too large for the given bitrate!\n");
 373     }
 374
 375     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 376         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 377         s->codec_id != AV_CODEC_ID_FLV1) {
 378         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 379         return -1;
 380     }
 381
 382     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 383         av_log(avctx, AV_LOG_ERROR,
 384                "OBMC is only supported with simple mb decision\n");
 385         return -1;
 386     }
 387
 388     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 389         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 390         return -1;
 391     }
 392
 393     if (s->max_b_frames                    &&
 394         s->codec_id != AV_CODEC_ID_MPEG4      &&
 395         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 396         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 397         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 398         return -1;
 399     }
 400
 401     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 402          s->codec_id == AV_CODEC_ID_H263  ||
 403          s->codec_id == AV_CODEC_ID_H263P) &&
 404         (avctx->sample_aspect_ratio.num > 255 ||
 405          avctx->sample_aspect_ratio.den > 255)) {
 406         av_log(avctx, AV_LOG_ERROR,
 407                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
 408                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 409         return -1;
 410     }
 411
 412     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 413         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 414         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 415         return -1;
 416     }
 417
 418     // FIXME mpeg2 uses that too
 419     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
 420         av_log(avctx, AV_LOG_ERROR,
 421                "mpeg2 style quantization not supported by codec\n");
 422         return -1;
 423     }
 424
 425     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 426         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 427         return -1;
 428     }
 429
 430     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 431         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 432         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 433         return -1;
 434     }
 435
 436     if (s->avctx->scenechange_threshold < 1000000000 &&
 437         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 438         av_log(avctx, AV_LOG_ERROR,
 439                "closed gop with scene change detection are not supported yet, "
 440                "set threshold to 1000000000\n");
 441         return -1;
 442     }
 443
 444     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 445         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 446             av_log(avctx, AV_LOG_ERROR,
 447                   "low delay forcing is only available for mpeg2\n");
 448             return -1;
 449         }
 450         if (s->max_b_frames != 0) {
 451             av_log(avctx, AV_LOG_ERROR,
 452                    "b frames cannot be used with low delay\n");
 453             return -1;
 454         }
 455     }
 456
 457     if (s->q_scale_type == 1) {
 458         if (avctx->qmax > 12) {
 459             av_log(avctx, AV_LOG_ERROR,
 460                    "non linear quant only supports qmax <= 12 currently\n");
 461             return -1;
 462         }
 463     }
 464
 465     if (s->avctx->thread_count > 1         &&
 466         s->codec_id != AV_CODEC_ID_MPEG4      &&
 467         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 468         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 469         (s->codec_id != AV_CODEC_ID_H263P)) {
 470         av_log(avctx, AV_LOG_ERROR,
 471                "multi threaded encoding not supported by codec\n");
 472         return -1;
 473     }
 474
 475     if (s->avctx->thread_count < 1) {
 476         av_log(avctx, AV_LOG_ERROR,
 477                "automatic thread number detection not supported by codec,"
 478                "patch welcome\n");
 479         return -1;
 480     }
 481
 482     if (s->avctx->thread_count > 1)
 483         s->rtp_mode = 1;
 484
 485     if (!avctx->time_base.den || !avctx->time_base.num) {
 486         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 487         return -1;
 488     }
 489
 490     i = (INT_MAX / 2 + 128) >> 8;
 491     if (avctx->mb_threshold >= i) {
 492         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 493                i - 1);
 494         return -1;
 495     }
 496
 497     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 498         av_log(avctx, AV_LOG_INFO,
 499                "notice: b_frame_strategy only affects the first pass\n");
 500         avctx->b_frame_strategy = 0;
 501     }
 502
 503     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 504     if (i > 1) {
 505         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 506         avctx->time_base.den /= i;
 507         avctx->time_base.num /= i;
 508         //return -1;
 509     }
 510
 511     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 512         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
 513         // (a + x * 3 / 8) / x
 514         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 515         s->inter_quant_bias = 0;
 516     } else {
 517         s->intra_quant_bias = 0;
 518         // (a - x / 4) / x
 519         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 520     }
 521
 522     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 523         s->intra_quant_bias = avctx->intra_quant_bias;
 524     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 525         s->inter_quant_bias = avctx->inter_quant_bias;
 526
 527     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 528         s->avctx->time_base.den > (1 << 16) - 1) {
 529         av_log(avctx, AV_LOG_ERROR,
 530                "timebase %d/%d not supported by MPEG 4 standard, "
 531                "the maximum admitted value for the timebase denominator "
 532                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 533                (1 << 16) - 1);
 534         return -1;
 535     }
 536     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 537
 538     switch (avctx->codec->id) {
 539     case AV_CODEC_ID_MPEG1VIDEO:
 540         s->out_format = FMT_MPEG1;
 541         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 542         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 543         break;
 544     case AV_CODEC_ID_MPEG2VIDEO:
 545         s->out_format = FMT_MPEG1;
 546         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 547         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 548         s->rtp_mode   = 1;
 549         break;
 550     case AV_CODEC_ID_MJPEG:
 551         s->out_format = FMT_MJPEG;
 552         s->intra_only = 1; /* force intra only for jpeg */
 553         if (!CONFIG_MJPEG_ENCODER ||
 554             ff_mjpeg_encode_init(s) < 0)
 555             return -1;
 556         avctx->delay = 0;
 557         s->low_delay = 1;
 558         break;
 559     case AV_CODEC_ID_H261:
 560         if (!CONFIG_H261_ENCODER)
 561             return -1;
 562         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 563             av_log(avctx, AV_LOG_ERROR,
 564                    "The specified picture size of %dx%d is not valid for the "
 565                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 566                     s->width, s->height);
 567             return -1;
 568         }
 569         s->out_format = FMT_H261;
 570         avctx->delay  = 0;
 571         s->low_delay  = 1;
 572         break;
 573     case AV_CODEC_ID_H263:
 574         if (!CONFIG_H263_ENCODER)
 575         return -1;
 576         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 577                              s->width, s->height) == 8) {
 578             av_log(avctx, AV_LOG_INFO,
 579                    "The specified picture size of %dx%d is not valid for "
 580                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 581                    "352x288, 704x576, and 1408x1152."
 582                    "Try H.263+.\n", s->width, s->height);
 583             return -1;
 584         }
 585         s->out_format = FMT_H263;
 586         avctx->delay  = 0;
 587         s->low_delay  = 1;
 588         break;
 589     case AV_CODEC_ID_H263P:
 590         s->out_format = FMT_H263;
 591         s->h263_plus  = 1;
 592         /* Fx */
 593         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 594         s->modified_quant  = s->h263_aic;
 595         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 596         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 597
 598         /* /Fx */
 599         /* These are just to be sure */
 600         avctx->delay = 0;
 601         s->low_delay = 1;
 602         break;
 603     case AV_CODEC_ID_FLV1:
 604         s->out_format      = FMT_H263;
 605         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 606         s->unrestricted_mv = 1;
 607         s->rtp_mode  = 0; /* don't allow GOB */
 608         avctx->delay = 0;
 609         s->low_delay = 1;
 610         break;
 611     case AV_CODEC_ID_RV10:
 612         s->out_format = FMT_H263;
 613         avctx->delay  = 0;
 614         s->low_delay  = 1;
 615         break;
 616     case AV_CODEC_ID_RV20:
 617         s->out_format      = FMT_H263;
 618         avctx->delay       = 0;
 619         s->low_delay       = 1;
 620         s->modified_quant  = 1;
 621         s->h263_aic        = 1;
 622         s->h263_plus       = 1;
 623         s->loop_filter     = 1;
 624         s->unrestricted_mv = 0;
 625         break;
 626     case AV_CODEC_ID_MPEG4:
 627         s->out_format      = FMT_H263;
 628         s->h263_pred       = 1;
 629         s->unrestricted_mv = 1;
 630         s->low_delay       = s->max_b_frames ? 0 : 1;
 631         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 632         break;
 633     case AV_CODEC_ID_MSMPEG4V2:
 634         s->out_format      = FMT_H263;
 635         s->h263_pred       = 1;
 636         s->unrestricted_mv = 1;
 637         s->msmpeg4_version = 2;
 638         avctx->delay       = 0;
 639         s->low_delay       = 1;
 640         break;
 641     case AV_CODEC_ID_MSMPEG4V3:
 642         s->out_format        = FMT_H263;
 643         s->h263_pred         = 1;
 644         s->unrestricted_mv   = 1;
 645         s->msmpeg4_version   = 3;
 646         s->flipflop_rounding = 1;
 647         avctx->delay         = 0;
 648         s->low_delay         = 1;
 649         break;
 650     case AV_CODEC_ID_WMV1:
 651         s->out_format        = FMT_H263;
 652         s->h263_pred         = 1;
 653         s->unrestricted_mv   = 1;
 654         s->msmpeg4_version   = 4;
 655         s->flipflop_rounding = 1;
 656         avctx->delay         = 0;
 657         s->low_delay         = 1;
 658         break;
 659     case AV_CODEC_ID_WMV2:
 660         s->out_format        = FMT_H263;
 661         s->h263_pred         = 1;
 662         s->unrestricted_mv   = 1;
 663         s->msmpeg4_version   = 5;
 664         s->flipflop_rounding = 1;
 665         avctx->delay         = 0;
 666         s->low_delay         = 1;
 667         break;
 668     default:
 669         return -1;
 670     }
 671
 672     avctx->has_b_frames = !s->low_delay;
 673
 674     s->encoding = 1;
 675
 676     s->progressive_frame    =
 677     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 678                                                 CODEC_FLAG_INTERLACED_ME) ||
 679                                 s->alternate_scan);
 680
 681     /* init */
 682     if (ff_MPV_common_init(s) < 0)
 683         return -1;
 684
 685     if (ARCH_X86)
 686         ff_MPV_encode_init_x86(s);
 687
 688     s->avctx->coded_frame = &s->current_picture.f;
 689
 690     if (s->msmpeg4_version) {
 691         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 692                           2 * 2 * (MAX_LEVEL + 1) *
 693                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 694     }
 695     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 696
 697     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 698     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 699     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 700     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 701     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 702                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 703     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 704                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 705
 706     if (s->avctx->noise_reduction) {
 707         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 708                           2 * 64 * sizeof(uint16_t), fail);
 709     }
 710
 711     ff_h263dsp_init(&s->h263dsp);
 712     if (!s->dct_quantize)
 713         s->dct_quantize = ff_dct_quantize_c;
 714     if (!s->denoise_dct)
 715         s->denoise_dct  = denoise_dct_c;
 716     s->fast_dct_quantize = s->dct_quantize;
 717     if (avctx->trellis)
 718         s->dct_quantize  = dct_quantize_trellis_c;
 719
 720     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 721         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 722
 723     s->quant_precision = 5;
 724
 725     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
 726     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
 727
 728     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 729         ff_h261_encode_init(s);
 730     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 731         ff_h263_encode_init(s);
 732     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 733         ff_msmpeg4_encode_init(s);
 734     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 735         && s->out_format == FMT_MPEG1)
 736         ff_mpeg1_encode_init(s);
 737
 738     /* init q matrix */
 739     for (i = 0; i < 64; i++) {
 740         int j = s->dsp.idct_permutation[i];
 741         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 742             s->mpeg_quant) {
 743             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 744             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 745         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 746             s->intra_matrix[j] =
 747             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 748         } else {
 749             /* mpeg1/2 */
 750             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 751             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 752         }
 753         if (s->avctx->intra_matrix)
 754             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 755         if (s->avctx->inter_matrix)
 756             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 757     }
 758
 759     /* precompute matrix */
 760     /* for mjpeg, we do include qscale in the matrix */
 761     if (s->out_format != FMT_MJPEG) {
 762         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
 763                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 764                           31, 1);
 765         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
 766                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 767                           31, 0);
 768     }
 769
 770     if (ff_rate_control_init(s) < 0)
 771         return -1;
 772
 773 #if FF_API_ERROR_RATE
 774     FF_DISABLE_DEPRECATION_WARNINGS
 775     if (avctx->error_rate)
 776         s->error_rate = avctx->error_rate;
 777     FF_ENABLE_DEPRECATION_WARNINGS;
 778 #endif
 779
 780     if (avctx->b_frame_strategy == 2) {
 781         for (i = 0; i < s->max_b_frames + 2; i++) {
 782             s->tmp_frames[i] = av_frame_alloc();
 783             if (!s->tmp_frames[i])
 784                 return AVERROR(ENOMEM);
 785
 786             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 787             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 788             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 789
 790             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 791             if (ret < 0)
 792                 return ret;
 793         }
 794     }
 795
 796     return 0;
 797 fail:
 798     ff_MPV_encode_end(avctx);
 799     return AVERROR_UNKNOWN;
 800 }
 801
 802 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
 803 {
 804     MpegEncContext *s = avctx->priv_data;
 805     int i;
 806
 807     ff_rate_control_uninit(s);
 808
 809     ff_MPV_common_end(s);
 810     if (CONFIG_MJPEG_ENCODER &&
 811         s->out_format == FMT_MJPEG)
 812         ff_mjpeg_encode_close(s);
 813
 814     av_freep(&avctx->extradata);
 815
 816     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 817         av_frame_free(&s->tmp_frames[i]);
 818
 819     ff_free_picture_tables(&s->new_picture);
 820     ff_mpeg_unref_picture(s, &s->new_picture);
 821
 822     av_freep(&s->avctx->stats_out);
 823     av_freep(&s->ac_stats);
 824
 825     av_freep(&s->q_intra_matrix);
 826     av_freep(&s->q_inter_matrix);
 827     av_freep(&s->q_intra_matrix16);
 828     av_freep(&s->q_inter_matrix16);
 829     av_freep(&s->input_picture);
 830     av_freep(&s->reordered_input_picture);
 831     av_freep(&s->dct_offset);
 832
 833     return 0;
 834 }
 835
 836 static int get_sae(uint8_t *src, int ref, int stride)
 837 {
 838     int x,y;
 839     int acc = 0;
 840
 841     for (y = 0; y < 16; y++) {
 842         for (x = 0; x < 16; x++) {
 843             acc += FFABS(src[x + y * stride] - ref);
 844         }
 845     }
 846
 847     return acc;
 848 }
 849
 850 static int get_intra_count(MpegEncContext *s, uint8_t *src,
 851                            uint8_t *ref, int stride)
 852 {
 853     int x, y, w, h;
 854     int acc = 0;
 855
 856     w = s->width  & ~15;
 857     h = s->height & ~15;
 858
 859     for (y = 0; y < h; y += 16) {
 860         for (x = 0; x < w; x += 16) {
 861             int offset = x + y * stride;
 862             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
 863                                      16);
 864             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
 865             int sae  = get_sae(src + offset, mean, stride);
 866
 867             acc += sae + 500 < sad;
 868         }
 869     }
 870     return acc;
 871 }
 872
 873
 874 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
 875 {
 876     Picture *pic = NULL;
 877     int64_t pts;
 878     int i, display_picture_number = 0, ret;
 879     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
 880                                                  (s->low_delay ? 0 : 1);
 881     int direct = 1;
 882
 883     if (pic_arg) {
 884         pts = pic_arg->pts;
 885         display_picture_number = s->input_picture_number++;
 886
 887         if (pts != AV_NOPTS_VALUE) {
 888             if (s->user_specified_pts != AV_NOPTS_VALUE) {
 889                 int64_t time = pts;
 890                 int64_t last = s->user_specified_pts;
 891
 892                 if (time <= last) {
 893                     av_log(s->avctx, AV_LOG_ERROR,
 894                            "Error, Invalid timestamp=%"PRId64", "
 895                            "last=%"PRId64"\n", pts, s->user_specified_pts);
 896                     return -1;
 897                 }
 898
 899                 if (!s->low_delay && display_picture_number == 1)
 900                     s->dts_delta = time - last;
 901             }
 902             s->user_specified_pts = pts;
 903         } else {
 904             if (s->user_specified_pts != AV_NOPTS_VALUE) {
 905                 s->user_specified_pts =
 906                 pts = s->user_specified_pts + 1;
 907                 av_log(s->avctx, AV_LOG_INFO,
 908                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
 909                        pts);
 910             } else {
 911                 pts = display_picture_number;
 912             }
 913         }
 914     }
 915
 916     if (pic_arg) {
 917         if (!pic_arg->buf[0]);
 918             direct = 0;
 919         if (pic_arg->linesize[0] != s->linesize)
 920             direct = 0;
 921         if (pic_arg->linesize[1] != s->uvlinesize)
 922             direct = 0;
 923         if (pic_arg->linesize[2] != s->uvlinesize)
 924             direct = 0;
 925
 926         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
 927                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
 928
 929         if (direct) {
 930             i = ff_find_unused_picture(s, 1);
 931             if (i < 0)
 932                 return i;
 933
 934             pic = &s->picture[i];
 935             pic->reference = 3;
 936
 937             if ((ret = av_frame_ref(&pic->f, pic_arg)) < 0)
 938                 return ret;
 939             if (ff_alloc_picture(s, pic, 1) < 0) {
 940                 return -1;
 941             }
 942         } else {
 943             i = ff_find_unused_picture(s, 0);
 944             if (i < 0)
 945                 return i;
 946
 947             pic = &s->picture[i];
 948             pic->reference = 3;
 949
 950             if (ff_alloc_picture(s, pic, 0) < 0) {
 951                 return -1;
 952             }
 953
 954             if (pic->f.data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
 955                 pic->f.data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
 956                 pic->f.data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
 957                 // empty
 958             } else {
 959                 int h_chroma_shift, v_chroma_shift;
 960                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
 961                                                  &h_chroma_shift,
 962                                                  &v_chroma_shift);
 963
 964                 for (i = 0; i < 3; i++) {
 965                     int src_stride = pic_arg->linesize[i];
 966                     int dst_stride = i ? s->uvlinesize : s->linesize;
 967                     int h_shift = i ? h_chroma_shift : 0;
 968                     int v_shift = i ? v_chroma_shift : 0;
 969                     int w = s->width  >> h_shift;
 970                     int h = s->height >> v_shift;
 971                     uint8_t *src = pic_arg->data[i];
 972                     uint8_t *dst = pic->f.data[i];
 973
 974                     if (!s->avctx->rc_buffer_size)
 975                         dst += INPLACE_OFFSET;
 976
 977                     if (src_stride == dst_stride)
 978                         memcpy(dst, src, src_stride * h);
 979                     else {
 980                         while (h--) {
 981                             memcpy(dst, src, w);
 982                             dst += dst_stride;
 983                             src += src_stride;
 984                         }
 985                     }
 986                 }
 987             }
 988         }
 989         ret = av_frame_copy_props(&pic->f, pic_arg);
 990         if (ret < 0)
 991             return ret;
 992
 993         pic->f.display_picture_number = display_picture_number;
 994         pic->f.pts = pts; // we set this here to avoid modifiying pic_arg
 995     }
 996
 997     /* shift buffer entries */
 998     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
 999         s->input_picture[i - 1] = s->input_picture[i];
1000
1001     s->input_picture[encoding_delay] = (Picture*) pic;
1002
1003     return 0;
1004 }
1005
1006 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1007 {
1008     int x, y, plane;
1009     int score = 0;
1010     int64_t score64 = 0;
1011
1012     for (plane = 0; plane < 3; plane++) {
1013         const int stride = p->f.linesize[plane];
1014         const int bw = plane ? 1 : 2;
1015         for (y = 0; y < s->mb_height * bw; y++) {
1016             for (x = 0; x < s->mb_width * bw; x++) {
1017                 int off = p->shared ? 0 : 16;
1018                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1019                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1020                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1021
1022                 switch (s->avctx->frame_skip_exp) {
1023                 case 0: score    =  FFMAX(score, v);          break;
1024                 case 1: score   += FFABS(v);                  break;
1025                 case 2: score   += v * v;                     break;
1026                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1027                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1028                 }
1029             }
1030         }
1031     }
1032
1033     if (score)
1034         score64 = score;
1035
1036     if (score64 < s->avctx->frame_skip_threshold)
1037         return 1;
1038     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1039         return 1;
1040     return 0;
1041 }
1042
1043 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1044 {
1045     AVPacket pkt = { 0 };
1046     int ret, got_output;
1047
1048     av_init_packet(&pkt);
1049     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1050     if (ret < 0)
1051         return ret;
1052
1053     ret = pkt.size;
1054     av_free_packet(&pkt);
1055     return ret;
1056 }
1057
1058 static int estimate_best_b_count(MpegEncContext *s)
1059 {
1060     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1061     AVCodecContext *c = avcodec_alloc_context3(NULL);
1062     const int scale = s->avctx->brd_scale;
1063     int i, j, out_size, p_lambda, b_lambda, lambda2;
1064     int64_t best_rd  = INT64_MAX;
1065     int best_b_count = -1;
1066
1067     assert(scale >= 0 && scale <= 3);
1068
1069     //emms_c();
1070     //s->next_picture_ptr->quality;
1071     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1072     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1073     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1074     if (!b_lambda) // FIXME we should do this somewhere else
1075         b_lambda = p_lambda;
1076     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1077                FF_LAMBDA_SHIFT;
1078
1079     c->width        = s->width  >> scale;
1080     c->height       = s->height >> scale;
1081     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1082                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1083     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1084     c->mb_decision  = s->avctx->mb_decision;
1085     c->me_cmp       = s->avctx->me_cmp;
1086     c->mb_cmp       = s->avctx->mb_cmp;
1087     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1088     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1089     c->time_base    = s->avctx->time_base;
1090     c->max_b_frames = s->max_b_frames;
1091
1092     if (avcodec_open2(c, codec, NULL) < 0)
1093         return -1;
1094
1095     for (i = 0; i < s->max_b_frames + 2; i++) {
1096         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1097                                                 s->next_picture_ptr;
1098
1099         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1100             pre_input = *pre_input_ptr;
1101
1102             if (!pre_input.shared && i) {
1103                 pre_input.f.data[0] += INPLACE_OFFSET;
1104                 pre_input.f.data[1] += INPLACE_OFFSET;
1105                 pre_input.f.data[2] += INPLACE_OFFSET;
1106             }
1107
1108             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1109                                  pre_input.f.data[0], pre_input.f.linesize[0],
1110                                  c->width,      c->height);
1111             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1112                                  pre_input.f.data[1], pre_input.f.linesize[1],
1113                                  c->width >> 1, c->height >> 1);
1114             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1115                                  pre_input.f.data[2], pre_input.f.linesize[2],
1116                                  c->width >> 1, c->height >> 1);
1117         }
1118     }
1119
1120     for (j = 0; j < s->max_b_frames + 1; j++) {
1121         int64_t rd = 0;
1122
1123         if (!s->input_picture[j])
1124             break;
1125
1126         c->error[0] = c->error[1] = c->error[2] = 0;
1127
1128         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1129         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1130
1131         out_size = encode_frame(c, s->tmp_frames[0]);
1132
1133         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1134
1135         for (i = 0; i < s->max_b_frames + 1; i++) {
1136             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1137
1138             s->tmp_frames[i + 1]->pict_type = is_p ?
1139                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1140             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1141
1142             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1143
1144             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1145         }
1146
1147         /* get the delayed frames */
1148         while (out_size) {
1149             out_size = encode_frame(c, NULL);
1150             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1151         }
1152
1153         rd += c->error[0] + c->error[1] + c->error[2];
1154
1155         if (rd < best_rd) {
1156             best_rd = rd;
1157             best_b_count = j;
1158         }
1159     }
1160
1161     avcodec_close(c);
1162     av_freep(&c);
1163
1164     return best_b_count;
1165 }
1166
1167 static int select_input_picture(MpegEncContext *s)
1168 {
1169     int i, ret;
1170
1171     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1172         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1173     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1174
1175     /* set next picture type & ordering */
1176     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1177         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1178             s->next_picture_ptr == NULL || s->intra_only) {
1179             s->reordered_input_picture[0] = s->input_picture[0];
1180             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1181             s->reordered_input_picture[0]->f.coded_picture_number =
1182                 s->coded_picture_number++;
1183         } else {
1184             int b_frames;
1185
1186             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1187                 if (s->picture_in_gop_number < s->gop_size &&
1188                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1189                     // FIXME check that te gop check above is +-1 correct
1190                     av_frame_unref(&s->input_picture[0]->f);
1191
1192                     emms_c();
1193                     ff_vbv_update(s, 0);
1194
1195                     goto no_output_pic;
1196                 }
1197             }
1198
1199             if (s->flags & CODEC_FLAG_PASS2) {
1200                 for (i = 0; i < s->max_b_frames + 1; i++) {
1201                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1202
1203                     if (pict_num >= s->rc_context.num_entries)
1204                         break;
1205                     if (!s->input_picture[i]) {
1206                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1207                         break;
1208                     }
1209
1210                     s->input_picture[i]->f.pict_type =
1211                         s->rc_context.entry[pict_num].new_pict_type;
1212                 }
1213             }
1214
1215             if (s->avctx->b_frame_strategy == 0) {
1216                 b_frames = s->max_b_frames;
1217                 while (b_frames && !s->input_picture[b_frames])
1218                     b_frames--;
1219             } else if (s->avctx->b_frame_strategy == 1) {
1220                 for (i = 1; i < s->max_b_frames + 1; i++) {
1221                     if (s->input_picture[i] &&
1222                         s->input_picture[i]->b_frame_score == 0) {
1223                         s->input_picture[i]->b_frame_score =
1224                             get_intra_count(s,
1225                                             s->input_picture[i    ]->f.data[0],
1226                                             s->input_picture[i - 1]->f.data[0],
1227                                             s->linesize) + 1;
1228                     }
1229                 }
1230                 for (i = 0; i < s->max_b_frames + 1; i++) {
1231                     if (s->input_picture[i] == NULL ||
1232                         s->input_picture[i]->b_frame_score - 1 >
1233                             s->mb_num / s->avctx->b_sensitivity)
1234                         break;
1235                 }
1236
1237                 b_frames = FFMAX(0, i - 1);
1238
1239                 /* reset scores */
1240                 for (i = 0; i < b_frames + 1; i++) {
1241                     s->input_picture[i]->b_frame_score = 0;
1242                 }
1243             } else if (s->avctx->b_frame_strategy == 2) {
1244                 b_frames = estimate_best_b_count(s);
1245             } else {
1246                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1247                 b_frames = 0;
1248             }
1249
1250             emms_c();
1251
1252             for (i = b_frames - 1; i >= 0; i--) {
1253                 int type = s->input_picture[i]->f.pict_type;
1254                 if (type && type != AV_PICTURE_TYPE_B)
1255                     b_frames = i;
1256             }
1257             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1258                 b_frames == s->max_b_frames) {
1259                 av_log(s->avctx, AV_LOG_ERROR,
1260                        "warning, too many b frames in a row\n");
1261             }
1262
1263             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1264                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1265                     s->gop_size > s->picture_in_gop_number) {
1266                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1267                 } else {
1268                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1269                         b_frames = 0;
1270                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1271                 }
1272             }
1273
1274             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1275                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1276                 b_frames--;
1277
1278             s->reordered_input_picture[0] = s->input_picture[b_frames];
1279             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1280                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1281             s->reordered_input_picture[0]->f.coded_picture_number =
1282                 s->coded_picture_number++;
1283             for (i = 0; i < b_frames; i++) {
1284                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1285                 s->reordered_input_picture[i + 1]->f.pict_type =
1286                     AV_PICTURE_TYPE_B;
1287                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1288                     s->coded_picture_number++;
1289             }
1290         }
1291     }
1292 no_output_pic:
1293     if (s->reordered_input_picture[0]) {
1294         s->reordered_input_picture[0]->reference =
1295            s->reordered_input_picture[0]->f.pict_type !=
1296                AV_PICTURE_TYPE_B ? 3 : 0;
1297
1298         ff_mpeg_unref_picture(s, &s->new_picture);
1299         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1300             return ret;
1301
1302         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1303             // input is a shared pix, so we can't modifiy it -> alloc a new
1304             // one & ensure that the shared one is reuseable
1305
1306             Picture *pic;
1307             int i = ff_find_unused_picture(s, 0);
1308             if (i < 0)
1309                 return i;
1310             pic = &s->picture[i];
1311
1312             pic->reference = s->reordered_input_picture[0]->reference;
1313             if (ff_alloc_picture(s, pic, 0) < 0) {
1314                 return -1;
1315             }
1316
1317             ret = av_frame_copy_props(&pic->f, &s->reordered_input_picture[0]->f);
1318             if (ret < 0)
1319                 return ret;
1320
1321             /* mark us unused / free shared pic */
1322             av_frame_unref(&s->reordered_input_picture[0]->f);
1323             s->reordered_input_picture[0]->shared = 0;
1324
1325             s->current_picture_ptr = pic;
1326         } else {
1327             // input is not a shared pix -> reuse buffer for current_pix
1328             s->current_picture_ptr = s->reordered_input_picture[0];
1329             for (i = 0; i < 4; i++) {
1330                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1331             }
1332         }
1333         ff_mpeg_unref_picture(s, &s->current_picture);
1334         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1335                                        s->current_picture_ptr)) < 0)
1336             return ret;
1337
1338         s->picture_number = s->new_picture.f.display_picture_number;
1339     } else {
1340         ff_mpeg_unref_picture(s, &s->new_picture);
1341     }
1342     return 0;
1343 }
1344
1345 static void frame_end(MpegEncContext *s)
1346 {
1347     int i;
1348
1349     if (s->unrestricted_mv &&
1350         s->current_picture.reference &&
1351         !s->intra_only) {
1352         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1353         int hshift = desc->log2_chroma_w;
1354         int vshift = desc->log2_chroma_h;
1355         s->dsp.draw_edges(s->current_picture.f.data[0], s->linesize,
1356                           s->h_edge_pos, s->v_edge_pos,
1357                           EDGE_WIDTH, EDGE_WIDTH,
1358                           EDGE_TOP | EDGE_BOTTOM);
1359         s->dsp.draw_edges(s->current_picture.f.data[1], s->uvlinesize,
1360                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1361                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1362                           EDGE_TOP | EDGE_BOTTOM);
1363         s->dsp.draw_edges(s->current_picture.f.data[2], s->uvlinesize,
1364                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1365                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1366                           EDGE_TOP | EDGE_BOTTOM);
1367     }
1368
1369     emms_c();
1370
1371     s->last_pict_type                 = s->pict_type;
1372     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f.quality;
1373     if (s->pict_type!= AV_PICTURE_TYPE_B)
1374         s->last_non_b_pict_type = s->pict_type;
1375
1376     if (s->encoding) {
1377         /* release non-reference frames */
1378         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1379             if (!s->picture[i].reference)
1380                 ff_mpeg_unref_picture(s, &s->picture[i]);
1381         }
1382     }
1383
1384     s->avctx->coded_frame = &s->current_picture_ptr->f;
1385
1386 }
1387
1388 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1389                           const AVFrame *pic_arg, int *got_packet)
1390 {
1391     MpegEncContext *s = avctx->priv_data;
1392     int i, stuffing_count, ret;
1393     int context_count = s->slice_context_count;
1394
1395     s->picture_in_gop_number++;
1396
1397     if (load_input_picture(s, pic_arg) < 0)
1398         return -1;
1399
1400     if (select_input_picture(s) < 0) {
1401         return -1;
1402     }
1403
1404     /* output? */
1405     if (s->new_picture.f.data[0]) {
1406         if (!pkt->data &&
1407             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1408             return ret;
1409         if (s->mb_info) {
1410             s->mb_info_ptr = av_packet_new_side_data(pkt,
1411                                  AV_PKT_DATA_H263_MB_INFO,
1412                                  s->mb_width*s->mb_height*12);
1413             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1414         }
1415
1416         for (i = 0; i < context_count; i++) {
1417             int start_y = s->thread_context[i]->start_mb_y;
1418             int   end_y = s->thread_context[i]->  end_mb_y;
1419             int h       = s->mb_height;
1420             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1421             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1422
1423             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1424         }
1425
1426         s->pict_type = s->new_picture.f.pict_type;
1427         //emms_c();
1428         ff_MPV_frame_start(s, avctx);
1429 vbv_retry:
1430         if (encode_picture(s, s->picture_number) < 0)
1431             return -1;
1432
1433         avctx->header_bits = s->header_bits;
1434         avctx->mv_bits     = s->mv_bits;
1435         avctx->misc_bits   = s->misc_bits;
1436         avctx->i_tex_bits  = s->i_tex_bits;
1437         avctx->p_tex_bits  = s->p_tex_bits;
1438         avctx->i_count     = s->i_count;
1439         // FIXME f/b_count in avctx
1440         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1441         avctx->skip_count  = s->skip_count;
1442
1443         frame_end(s);
1444
1445         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1446             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1447
1448         if (avctx->rc_buffer_size) {
1449             RateControlContext *rcc = &s->rc_context;
1450             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1451
1452             if (put_bits_count(&s->pb) > max_size &&
1453                 s->lambda < s->avctx->lmax) {
1454                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1455                                        (s->qscale + 1) / s->qscale);
1456                 if (s->adaptive_quant) {
1457                     int i;
1458                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1459                         s->lambda_table[i] =
1460                             FFMAX(s->lambda_table[i] + 1,
1461                                   s->lambda_table[i] * (s->qscale + 1) /
1462                                   s->qscale);
1463                 }
1464                 s->mb_skipped = 0;        // done in MPV_frame_start()
1465                 // done in encode_picture() so we must undo it
1466                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1467                     if (s->flipflop_rounding          ||
1468                         s->codec_id == AV_CODEC_ID_H263P ||
1469                         s->codec_id == AV_CODEC_ID_MPEG4)
1470                         s->no_rounding ^= 1;
1471                 }
1472                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1473                     s->time_base       = s->last_time_base;
1474                     s->last_non_b_time = s->time - s->pp_time;
1475                 }
1476                 for (i = 0; i < context_count; i++) {
1477                     PutBitContext *pb = &s->thread_context[i]->pb;
1478                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1479                 }
1480                 goto vbv_retry;
1481             }
1482
1483             assert(s->avctx->rc_max_rate);
1484         }
1485
1486         if (s->flags & CODEC_FLAG_PASS1)
1487             ff_write_pass1_stats(s);
1488
1489         for (i = 0; i < 4; i++) {
1490             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1491             avctx->error[i] += s->current_picture_ptr->f.error[i];
1492         }
1493
1494         if (s->flags & CODEC_FLAG_PASS1)
1495             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1496                    avctx->i_tex_bits + avctx->p_tex_bits ==
1497                        put_bits_count(&s->pb));
1498         flush_put_bits(&s->pb);
1499         s->frame_bits  = put_bits_count(&s->pb);
1500
1501         stuffing_count = ff_vbv_update(s, s->frame_bits);
1502         if (stuffing_count) {
1503             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1504                     stuffing_count + 50) {
1505                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1506                 return -1;
1507             }
1508
1509             switch (s->codec_id) {
1510             case AV_CODEC_ID_MPEG1VIDEO:
1511             case AV_CODEC_ID_MPEG2VIDEO:
1512                 while (stuffing_count--) {
1513                     put_bits(&s->pb, 8, 0);
1514                 }
1515             break;
1516             case AV_CODEC_ID_MPEG4:
1517                 put_bits(&s->pb, 16, 0);
1518                 put_bits(&s->pb, 16, 0x1C3);
1519                 stuffing_count -= 4;
1520                 while (stuffing_count--) {
1521                     put_bits(&s->pb, 8, 0xFF);
1522                 }
1523             break;
1524             default:
1525                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1526             }
1527             flush_put_bits(&s->pb);
1528             s->frame_bits  = put_bits_count(&s->pb);
1529         }
1530
1531         /* update mpeg1/2 vbv_delay for CBR */
1532         if (s->avctx->rc_max_rate                          &&
1533             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1534             s->out_format == FMT_MPEG1                     &&
1535             90000LL * (avctx->rc_buffer_size - 1) <=
1536                 s->avctx->rc_max_rate * 0xFFFFLL) {
1537             int vbv_delay, min_delay;
1538             double inbits  = s->avctx->rc_max_rate *
1539                              av_q2d(s->avctx->time_base);
1540             int    minbits = s->frame_bits - 8 *
1541                              (s->vbv_delay_ptr - s->pb.buf - 1);
1542             double bits    = s->rc_context.buffer_index + minbits - inbits;
1543
1544             if (bits < 0)
1545                 av_log(s->avctx, AV_LOG_ERROR,
1546                        "Internal error, negative bits\n");
1547
1548             assert(s->repeat_first_field == 0);
1549
1550             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1551             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1552                         s->avctx->rc_max_rate;
1553
1554             vbv_delay = FFMAX(vbv_delay, min_delay);
1555
1556             assert(vbv_delay < 0xFFFF);
1557
1558             s->vbv_delay_ptr[0] &= 0xF8;
1559             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1560             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1561             s->vbv_delay_ptr[2] &= 0x07;
1562             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1563             avctx->vbv_delay     = vbv_delay * 300;
1564         }
1565         s->total_bits     += s->frame_bits;
1566         avctx->frame_bits  = s->frame_bits;
1567
1568         pkt->pts = s->current_picture.f.pts;
1569         if (!s->low_delay) {
1570             if (!s->current_picture.f.coded_picture_number)
1571                 pkt->dts = pkt->pts - s->dts_delta;
1572             else
1573                 pkt->dts = s->reordered_pts;
1574             s->reordered_pts = s->input_picture[0]->f.pts;
1575         } else
1576             pkt->dts = pkt->pts;
1577         if (s->current_picture.f.key_frame)
1578             pkt->flags |= AV_PKT_FLAG_KEY;
1579         if (s->mb_info)
1580             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1581     } else {
1582         s->frame_bits = 0;
1583     }
1584     assert((s->frame_bits & 7) == 0);
1585
1586     pkt->size = s->frame_bits / 8;
1587     *got_packet = !!pkt->size;
1588     return 0;
1589 }
1590
1591 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1592                                                 int n, int threshold)
1593 {
1594     static const char tab[64] = {
1595         3, 2, 2, 1, 1, 1, 1, 1,
1596         1, 1, 1, 1, 1, 1, 1, 1,
1597         1, 1, 1, 1, 1, 1, 1, 1,
1598         0, 0, 0, 0, 0, 0, 0, 0,
1599         0, 0, 0, 0, 0, 0, 0, 0,
1600         0, 0, 0, 0, 0, 0, 0, 0,
1601         0, 0, 0, 0, 0, 0, 0, 0,
1602         0, 0, 0, 0, 0, 0, 0, 0
1603     };
1604     int score = 0;
1605     int run = 0;
1606     int i;
1607     int16_t *block = s->block[n];
1608     const int last_index = s->block_last_index[n];
1609     int skip_dc;
1610
1611     if (threshold < 0) {
1612         skip_dc = 0;
1613         threshold = -threshold;
1614     } else
1615         skip_dc = 1;
1616
1617     /* Are all we could set to zero already zero? */
1618     if (last_index <= skip_dc - 1)
1619         return;
1620
1621     for (i = 0; i <= last_index; i++) {
1622         const int j = s->intra_scantable.permutated[i];
1623         const int level = FFABS(block[j]);
1624         if (level == 1) {
1625             if (skip_dc && i == 0)
1626                 continue;
1627             score += tab[run];
1628             run = 0;
1629         } else if (level > 1) {
1630             return;
1631         } else {
1632             run++;
1633         }
1634     }
1635     if (score >= threshold)
1636         return;
1637     for (i = skip_dc; i <= last_index; i++) {
1638         const int j = s->intra_scantable.permutated[i];
1639         block[j] = 0;
1640     }
1641     if (block[0])
1642         s->block_last_index[n] = 0;
1643     else
1644         s->block_last_index[n] = -1;
1645 }
1646
1647 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1648                                int last_index)
1649 {
1650     int i;
1651     const int maxlevel = s->max_qcoeff;
1652     const int minlevel = s->min_qcoeff;
1653     int overflow = 0;
1654
1655     if (s->mb_intra) {
1656         i = 1; // skip clipping of intra dc
1657     } else
1658         i = 0;
1659
1660     for (; i <= last_index; i++) {
1661         const int j = s->intra_scantable.permutated[i];
1662         int level = block[j];
1663
1664         if (level > maxlevel) {
1665             level = maxlevel;
1666             overflow++;
1667         } else if (level < minlevel) {
1668             level = minlevel;
1669             overflow++;
1670         }
1671
1672         block[j] = level;
1673     }
1674
1675     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1676         av_log(s->avctx, AV_LOG_INFO,
1677                "warning, clipping %d dct coefficients to %d..%d\n",
1678                overflow, minlevel, maxlevel);
1679 }
1680
1681 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1682 {
1683     int x, y;
1684     // FIXME optimize
1685     for (y = 0; y < 8; y++) {
1686         for (x = 0; x < 8; x++) {
1687             int x2, y2;
1688             int sum = 0;
1689             int sqr = 0;
1690             int count = 0;
1691
1692             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1693                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1694                     int v = ptr[x2 + y2 * stride];
1695                     sum += v;
1696                     sqr += v * v;
1697                     count++;
1698                 }
1699             }
1700             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1701         }
1702     }
1703 }
1704
1705 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1706                                                 int motion_x, int motion_y,
1707                                                 int mb_block_height,
1708                                                 int mb_block_count)
1709 {
1710     int16_t weight[8][64];
1711     int16_t orig[8][64];
1712     const int mb_x = s->mb_x;
1713     const int mb_y = s->mb_y;
1714     int i;
1715     int skip_dct[8];
1716     int dct_offset = s->linesize * 8; // default for progressive frames
1717     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1718     ptrdiff_t wrap_y, wrap_c;
1719
1720     for (i = 0; i < mb_block_count; i++)
1721         skip_dct[i] = s->skipdct;
1722
1723     if (s->adaptive_quant) {
1724         const int last_qp = s->qscale;
1725         const int mb_xy = mb_x + mb_y * s->mb_stride;
1726
1727         s->lambda = s->lambda_table[mb_xy];
1728         update_qscale(s);
1729
1730         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1731             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1732             s->dquant = s->qscale - last_qp;
1733
1734             if (s->out_format == FMT_H263) {
1735                 s->dquant = av_clip(s->dquant, -2, 2);
1736
1737                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1738                     if (!s->mb_intra) {
1739                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1740                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1741                                 s->dquant = 0;
1742                         }
1743                         if (s->mv_type == MV_TYPE_8X8)
1744                             s->dquant = 0;
1745                     }
1746                 }
1747             }
1748         }
1749         ff_set_qscale(s, last_qp + s->dquant);
1750     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1751         ff_set_qscale(s, s->qscale + s->dquant);
1752
1753     wrap_y = s->linesize;
1754     wrap_c = s->uvlinesize;
1755     ptr_y  = s->new_picture.f.data[0] +
1756              (mb_y * 16 * wrap_y)              + mb_x * 16;
1757     ptr_cb = s->new_picture.f.data[1] +
1758              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1759     ptr_cr = s->new_picture.f.data[2] +
1760              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1761
1762     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1763         uint8_t *ebuf = s->edge_emu_buffer + 32;
1764         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1765                                  wrap_y, wrap_y,
1766                                  16, 16, mb_x * 16, mb_y * 16,
1767                                  s->width, s->height);
1768         ptr_y = ebuf;
1769         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1770                                  wrap_c, wrap_c,
1771                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1772                                  s->width >> 1, s->height >> 1);
1773         ptr_cb = ebuf + 18 * wrap_y;
1774         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1775                                  wrap_c, wrap_c,
1776                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1777                                  s->width >> 1, s->height >> 1);
1778         ptr_cr = ebuf + 18 * wrap_y + 8;
1779     }
1780
1781     if (s->mb_intra) {
1782         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1783             int progressive_score, interlaced_score;
1784
1785             s->interlaced_dct = 0;
1786             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1787                                                     NULL, wrap_y, 8) +
1788                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1789                                                     NULL, wrap_y, 8) - 400;
1790
1791             if (progressive_score > 0) {
1792                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1793                                                        NULL, wrap_y * 2, 8) +
1794                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1795                                                        NULL, wrap_y * 2, 8);
1796                 if (progressive_score > interlaced_score) {
1797                     s->interlaced_dct = 1;
1798
1799                     dct_offset = wrap_y;
1800                     wrap_y <<= 1;
1801                     if (s->chroma_format == CHROMA_422)
1802                         wrap_c <<= 1;
1803                 }
1804             }
1805         }
1806
1807         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1808         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1809         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1810         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1811
1812         if (s->flags & CODEC_FLAG_GRAY) {
1813             skip_dct[4] = 1;
1814             skip_dct[5] = 1;
1815         } else {
1816             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1817             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1818             if (!s->chroma_y_shift) { /* 422 */
1819                 s->dsp.get_pixels(s->block[6],
1820                                   ptr_cb + (dct_offset >> 1), wrap_c);
1821                 s->dsp.get_pixels(s->block[7],
1822                                   ptr_cr + (dct_offset >> 1), wrap_c);
1823             }
1824         }
1825     } else {
1826         op_pixels_func (*op_pix)[4];
1827         qpel_mc_func (*op_qpix)[16];
1828         uint8_t *dest_y, *dest_cb, *dest_cr;
1829
1830         dest_y  = s->dest[0];
1831         dest_cb = s->dest[1];
1832         dest_cr = s->dest[2];
1833
1834         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1835             op_pix  = s->hdsp.put_pixels_tab;
1836             op_qpix = s->dsp.put_qpel_pixels_tab;
1837         } else {
1838             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1839             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1840         }
1841
1842         if (s->mv_dir & MV_DIR_FORWARD) {
1843             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1844                           s->last_picture.f.data,
1845                           op_pix, op_qpix);
1846             op_pix  = s->hdsp.avg_pixels_tab;
1847             op_qpix = s->dsp.avg_qpel_pixels_tab;
1848         }
1849         if (s->mv_dir & MV_DIR_BACKWARD) {
1850             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1851                           s->next_picture.f.data,
1852                           op_pix, op_qpix);
1853         }
1854
1855         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1856             int progressive_score, interlaced_score;
1857
1858             s->interlaced_dct = 0;
1859             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1860                                                     ptr_y,              wrap_y,
1861                                                     8) +
1862                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1863                                                     ptr_y + wrap_y * 8, wrap_y,
1864                                                     8) - 400;
1865
1866             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1867                 progressive_score -= 400;
1868
1869             if (progressive_score > 0) {
1870                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1871                                                        ptr_y,
1872                                                        wrap_y * 2, 8) +
1873                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1874                                                        ptr_y + wrap_y,
1875                                                        wrap_y * 2, 8);
1876
1877                 if (progressive_score > interlaced_score) {
1878                     s->interlaced_dct = 1;
1879
1880                     dct_offset = wrap_y;
1881                     wrap_y <<= 1;
1882                     if (s->chroma_format == CHROMA_422)
1883                         wrap_c <<= 1;
1884                 }
1885             }
1886         }
1887
1888         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1889         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1890         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1891                            dest_y + dct_offset, wrap_y);
1892         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1893                            dest_y + dct_offset + 8, wrap_y);
1894
1895         if (s->flags & CODEC_FLAG_GRAY) {
1896             skip_dct[4] = 1;
1897             skip_dct[5] = 1;
1898         } else {
1899             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1900             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1901             if (!s->chroma_y_shift) { /* 422 */
1902                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1903                                    dest_cb + (dct_offset >> 1), wrap_c);
1904                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1905                                    dest_cr + (dct_offset >> 1), wrap_c);
1906             }
1907         }
1908         /* pre quantization */
1909         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1910                 2 * s->qscale * s->qscale) {
1911             // FIXME optimize
1912             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1913                               wrap_y, 8) < 20 * s->qscale)
1914                 skip_dct[0] = 1;
1915             if (s->dsp.sad[1](NULL, ptr_y + 8,
1916                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1917                 skip_dct[1] = 1;
1918             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1919                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1920                 skip_dct[2] = 1;
1921             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1922                               dest_y + dct_offset + 8,
1923                               wrap_y, 8) < 20 * s->qscale)
1924                 skip_dct[3] = 1;
1925             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1926                               wrap_c, 8) < 20 * s->qscale)
1927                 skip_dct[4] = 1;
1928             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1929                               wrap_c, 8) < 20 * s->qscale)
1930                 skip_dct[5] = 1;
1931             if (!s->chroma_y_shift) { /* 422 */
1932                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1933                                   dest_cb + (dct_offset >> 1),
1934                                   wrap_c, 8) < 20 * s->qscale)
1935                     skip_dct[6] = 1;
1936                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1937                                   dest_cr + (dct_offset >> 1),
1938                                   wrap_c, 8) < 20 * s->qscale)
1939                     skip_dct[7] = 1;
1940             }
1941         }
1942     }
1943
1944     if (s->quantizer_noise_shaping) {
1945         if (!skip_dct[0])
1946             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1947         if (!skip_dct[1])
1948             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1949         if (!skip_dct[2])
1950             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1951         if (!skip_dct[3])
1952             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1953         if (!skip_dct[4])
1954             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1955         if (!skip_dct[5])
1956             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1957         if (!s->chroma_y_shift) { /* 422 */
1958             if (!skip_dct[6])
1959                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1960                                   wrap_c);
1961             if (!skip_dct[7])
1962                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1963                                   wrap_c);
1964         }
1965         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
1966     }
1967
1968     /* DCT & quantize */
1969     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1970     {
1971         for (i = 0; i < mb_block_count; i++) {
1972             if (!skip_dct[i]) {
1973                 int overflow;
1974                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1975                 // FIXME we could decide to change to quantizer instead of
1976                 // clipping
1977                 // JS: I don't think that would be a good idea it could lower
1978                 //     quality instead of improve it. Just INTRADC clipping
1979                 //     deserves changes in quantizer
1980                 if (overflow)
1981                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
1982             } else
1983                 s->block_last_index[i] = -1;
1984         }
1985         if (s->quantizer_noise_shaping) {
1986             for (i = 0; i < mb_block_count; i++) {
1987                 if (!skip_dct[i]) {
1988                     s->block_last_index[i] =
1989                         dct_quantize_refine(s, s->block[i], weight[i],
1990                                             orig[i], i, s->qscale);
1991                 }
1992             }
1993         }
1994
1995         if (s->luma_elim_threshold && !s->mb_intra)
1996             for (i = 0; i < 4; i++)
1997                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
1998         if (s->chroma_elim_threshold && !s->mb_intra)
1999             for (i = 4; i < mb_block_count; i++)
2000                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2001
2002         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2003             for (i = 0; i < mb_block_count; i++) {
2004                 if (s->block_last_index[i] == -1)
2005                     s->coded_score[i] = INT_MAX / 256;
2006             }
2007         }
2008     }
2009
2010     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2011         s->block_last_index[4] =
2012         s->block_last_index[5] = 0;
2013         s->block[4][0] =
2014         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2015     }
2016
2017     // non c quantize code returns incorrect block_last_index FIXME
2018     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2019         for (i = 0; i < mb_block_count; i++) {
2020             int j;
2021             if (s->block_last_index[i] > 0) {
2022                 for (j = 63; j > 0; j--) {
2023                     if (s->block[i][s->intra_scantable.permutated[j]])
2024                         break;
2025                 }
2026                 s->block_last_index[i] = j;
2027             }
2028         }
2029     }
2030
2031     /* huffman encode */
2032     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2033     case AV_CODEC_ID_MPEG1VIDEO:
2034     case AV_CODEC_ID_MPEG2VIDEO:
2035         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2036             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2037         break;
2038     case AV_CODEC_ID_MPEG4:
2039         if (CONFIG_MPEG4_ENCODER)
2040             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2041         break;
2042     case AV_CODEC_ID_MSMPEG4V2:
2043     case AV_CODEC_ID_MSMPEG4V3:
2044     case AV_CODEC_ID_WMV1:
2045         if (CONFIG_MSMPEG4_ENCODER)
2046             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2047         break;
2048     case AV_CODEC_ID_WMV2:
2049         if (CONFIG_WMV2_ENCODER)
2050             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2051         break;
2052     case AV_CODEC_ID_H261:
2053         if (CONFIG_H261_ENCODER)
2054             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2055         break;
2056     case AV_CODEC_ID_H263:
2057     case AV_CODEC_ID_H263P:
2058     case AV_CODEC_ID_FLV1:
2059     case AV_CODEC_ID_RV10:
2060     case AV_CODEC_ID_RV20:
2061         if (CONFIG_H263_ENCODER)
2062             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2063         break;
2064     case AV_CODEC_ID_MJPEG:
2065         if (CONFIG_MJPEG_ENCODER)
2066             ff_mjpeg_encode_mb(s, s->block);
2067         break;
2068     default:
2069         assert(0);
2070     }
2071 }
2072
2073 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2074 {
2075     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2076     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2077 }
2078
2079 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2080     int i;
2081
2082     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2083
2084     /* mpeg1 */
2085     d->mb_skip_run= s->mb_skip_run;
2086     for(i=0; i<3; i++)
2087         d->last_dc[i] = s->last_dc[i];
2088
2089     /* statistics */
2090     d->mv_bits= s->mv_bits;
2091     d->i_tex_bits= s->i_tex_bits;
2092     d->p_tex_bits= s->p_tex_bits;
2093     d->i_count= s->i_count;
2094     d->f_count= s->f_count;
2095     d->b_count= s->b_count;
2096     d->skip_count= s->skip_count;
2097     d->misc_bits= s->misc_bits;
2098     d->last_bits= 0;
2099
2100     d->mb_skipped= 0;
2101     d->qscale= s->qscale;
2102     d->dquant= s->dquant;
2103
2104     d->esc3_level_length= s->esc3_level_length;
2105 }
2106
2107 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2108     int i;
2109
2110     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2111     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2112
2113     /* mpeg1 */
2114     d->mb_skip_run= s->mb_skip_run;
2115     for(i=0; i<3; i++)
2116         d->last_dc[i] = s->last_dc[i];
2117
2118     /* statistics */
2119     d->mv_bits= s->mv_bits;
2120     d->i_tex_bits= s->i_tex_bits;
2121     d->p_tex_bits= s->p_tex_bits;
2122     d->i_count= s->i_count;
2123     d->f_count= s->f_count;
2124     d->b_count= s->b_count;
2125     d->skip_count= s->skip_count;
2126     d->misc_bits= s->misc_bits;
2127
2128     d->mb_intra= s->mb_intra;
2129     d->mb_skipped= s->mb_skipped;
2130     d->mv_type= s->mv_type;
2131     d->mv_dir= s->mv_dir;
2132     d->pb= s->pb;
2133     if(s->data_partitioning){
2134         d->pb2= s->pb2;
2135         d->tex_pb= s->tex_pb;
2136     }
2137     d->block= s->block;
2138     for(i=0; i<8; i++)
2139         d->block_last_index[i]= s->block_last_index[i];
2140     d->interlaced_dct= s->interlaced_dct;
2141     d->qscale= s->qscale;
2142
2143     d->esc3_level_length= s->esc3_level_length;
2144 }
2145
2146 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2147                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2148                            int *dmin, int *next_block, int motion_x, int motion_y)
2149 {
2150     int score;
2151     uint8_t *dest_backup[3];
2152
2153     copy_context_before_encode(s, backup, type);
2154
2155     s->block= s->blocks[*next_block];
2156     s->pb= pb[*next_block];
2157     if(s->data_partitioning){
2158         s->pb2   = pb2   [*next_block];
2159         s->tex_pb= tex_pb[*next_block];
2160     }
2161
2162     if(*next_block){
2163         memcpy(dest_backup, s->dest, sizeof(s->dest));
2164         s->dest[0] = s->rd_scratchpad;
2165         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2166         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2167         assert(s->linesize >= 32); //FIXME
2168     }
2169
2170     encode_mb(s, motion_x, motion_y);
2171
2172     score= put_bits_count(&s->pb);
2173     if(s->data_partitioning){
2174         score+= put_bits_count(&s->pb2);
2175         score+= put_bits_count(&s->tex_pb);
2176     }
2177
2178     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2179         ff_MPV_decode_mb(s, s->block);
2180
2181         score *= s->lambda2;
2182         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2183     }
2184
2185     if(*next_block){
2186         memcpy(s->dest, dest_backup, sizeof(s->dest));
2187     }
2188
2189     if(score<*dmin){
2190         *dmin= score;
2191         *next_block^=1;
2192
2193         copy_context_after_encode(best, s, type);
2194     }
2195 }
2196
2197 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2198     uint32_t *sq = ff_squareTbl + 256;
2199     int acc=0;
2200     int x,y;
2201
2202     if(w==16 && h==16)
2203         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2204     else if(w==8 && h==8)
2205         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2206
2207     for(y=0; y<h; y++){
2208         for(x=0; x<w; x++){
2209             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2210         }
2211     }
2212
2213     assert(acc>=0);
2214
2215     return acc;
2216 }
2217
2218 static int sse_mb(MpegEncContext *s){
2219     int w= 16;
2220     int h= 16;
2221
2222     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2223     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2224
2225     if(w==16 && h==16)
2226       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2227         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2228                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2229                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2230       }else{
2231         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2232                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2233                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2234       }
2235     else
2236         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2237                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2238                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2239 }
2240
2241 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2242     MpegEncContext *s= *(void**)arg;
2243
2244
2245     s->me.pre_pass=1;
2246     s->me.dia_size= s->avctx->pre_dia_size;
2247     s->first_slice_line=1;
2248     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2249         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2250             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2251         }
2252         s->first_slice_line=0;
2253     }
2254
2255     s->me.pre_pass=0;
2256
2257     return 0;
2258 }
2259
2260 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2261     MpegEncContext *s= *(void**)arg;
2262
2263     ff_check_alignment();
2264
2265     s->me.dia_size= s->avctx->dia_size;
2266     s->first_slice_line=1;
2267     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2268         s->mb_x=0; //for block init below
2269         ff_init_block_index(s);
2270         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2271             s->block_index[0]+=2;
2272             s->block_index[1]+=2;
2273             s->block_index[2]+=2;
2274             s->block_index[3]+=2;
2275
2276             /* compute motion vector & mb_type and store in context */
2277             if(s->pict_type==AV_PICTURE_TYPE_B)
2278                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2279             else
2280                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2281         }
2282         s->first_slice_line=0;
2283     }
2284     return 0;
2285 }
2286
2287 static int mb_var_thread(AVCodecContext *c, void *arg){
2288     MpegEncContext *s= *(void**)arg;
2289     int mb_x, mb_y;
2290
2291     ff_check_alignment();
2292
2293     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2294         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2295             int xx = mb_x * 16;
2296             int yy = mb_y * 16;
2297             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2298             int varc;
2299             int sum = s->dsp.pix_sum(pix, s->linesize);
2300
2301             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2302
2303             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2304             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2305             s->me.mb_var_sum_temp    += varc;
2306         }
2307     }
2308     return 0;
2309 }
2310
2311 static void write_slice_end(MpegEncContext *s){
2312     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2313         if(s->partitioned_frame){
2314             ff_mpeg4_merge_partitions(s);
2315         }
2316
2317         ff_mpeg4_stuffing(&s->pb);
2318     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2319         ff_mjpeg_encode_stuffing(&s->pb);
2320     }
2321
2322     avpriv_align_put_bits(&s->pb);
2323     flush_put_bits(&s->pb);
2324
2325     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2326         s->misc_bits+= get_bits_diff(s);
2327 }
2328
2329 static void write_mb_info(MpegEncContext *s)
2330 {
2331     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2332     int offset = put_bits_count(&s->pb);
2333     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2334     int gobn = s->mb_y / s->gob_index;
2335     int pred_x, pred_y;
2336     if (CONFIG_H263_ENCODER)
2337         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2338     bytestream_put_le32(&ptr, offset);
2339     bytestream_put_byte(&ptr, s->qscale);
2340     bytestream_put_byte(&ptr, gobn);
2341     bytestream_put_le16(&ptr, mba);
2342     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2343     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2344     /* 4MV not implemented */
2345     bytestream_put_byte(&ptr, 0); /* hmv2 */
2346     bytestream_put_byte(&ptr, 0); /* vmv2 */
2347 }
2348
2349 static void update_mb_info(MpegEncContext *s, int startcode)
2350 {
2351     if (!s->mb_info)
2352         return;
2353     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2354         s->mb_info_size += 12;
2355         s->prev_mb_info = s->last_mb_info;
2356     }
2357     if (startcode) {
2358         s->prev_mb_info = put_bits_count(&s->pb)/8;
2359         /* This might have incremented mb_info_size above, and we return without
2360          * actually writing any info into that slot yet. But in that case,
2361          * this will be called again at the start of the after writing the
2362          * start code, actually writing the mb info. */
2363         return;
2364     }
2365
2366     s->last_mb_info = put_bits_count(&s->pb)/8;
2367     if (!s->mb_info_size)
2368         s->mb_info_size += 12;
2369     write_mb_info(s);
2370 }
2371
2372 static int encode_thread(AVCodecContext *c, void *arg){
2373     MpegEncContext *s= *(void**)arg;
2374     int mb_x, mb_y, pdif = 0;
2375     int chr_h= 16>>s->chroma_y_shift;
2376     int i, j;
2377     MpegEncContext best_s, backup_s;
2378     uint8_t bit_buf[2][MAX_MB_BYTES];
2379     uint8_t bit_buf2[2][MAX_MB_BYTES];
2380     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2381     PutBitContext pb[2], pb2[2], tex_pb[2];
2382
2383     ff_check_alignment();
2384
2385     for(i=0; i<2; i++){
2386         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2387         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2388         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2389     }
2390
2391     s->last_bits= put_bits_count(&s->pb);
2392     s->mv_bits=0;
2393     s->misc_bits=0;
2394     s->i_tex_bits=0;
2395     s->p_tex_bits=0;
2396     s->i_count=0;
2397     s->f_count=0;
2398     s->b_count=0;
2399     s->skip_count=0;
2400
2401     for(i=0; i<3; i++){
2402         /* init last dc values */
2403         /* note: quant matrix value (8) is implied here */
2404         s->last_dc[i] = 128 << s->intra_dc_precision;
2405
2406         s->current_picture.f.error[i] = 0;
2407     }
2408     s->mb_skip_run = 0;
2409     memset(s->last_mv, 0, sizeof(s->last_mv));
2410
2411     s->last_mv_dir = 0;
2412
2413     switch(s->codec_id){
2414     case AV_CODEC_ID_H263:
2415     case AV_CODEC_ID_H263P:
2416     case AV_CODEC_ID_FLV1:
2417         if (CONFIG_H263_ENCODER)
2418             s->gob_index = ff_h263_get_gob_height(s);
2419         break;
2420     case AV_CODEC_ID_MPEG4:
2421         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2422             ff_mpeg4_init_partitions(s);
2423         break;
2424     }
2425
2426     s->resync_mb_x=0;
2427     s->resync_mb_y=0;
2428     s->first_slice_line = 1;
2429     s->ptr_lastgob = s->pb.buf;
2430     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2431         s->mb_x=0;
2432         s->mb_y= mb_y;
2433
2434         ff_set_qscale(s, s->qscale);
2435         ff_init_block_index(s);
2436
2437         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2438             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2439             int mb_type= s->mb_type[xy];
2440 //            int d;
2441             int dmin= INT_MAX;
2442             int dir;
2443
2444             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2445                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2446                 return -1;
2447             }
2448             if(s->data_partitioning){
2449                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2450                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2451                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2452                     return -1;
2453                 }
2454             }
2455
2456             s->mb_x = mb_x;
2457             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2458             ff_update_block_index(s);
2459
2460             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2461                 ff_h261_reorder_mb_index(s);
2462                 xy= s->mb_y*s->mb_stride + s->mb_x;
2463                 mb_type= s->mb_type[xy];
2464             }
2465
2466             /* write gob / video packet header  */
2467             if(s->rtp_mode){
2468                 int current_packet_size, is_gob_start;
2469
2470                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2471
2472                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2473
2474                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2475
2476                 switch(s->codec_id){
2477                 case AV_CODEC_ID_H263:
2478                 case AV_CODEC_ID_H263P:
2479                     if(!s->h263_slice_structured)
2480                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2481                     break;
2482                 case AV_CODEC_ID_MPEG2VIDEO:
2483                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2484                 case AV_CODEC_ID_MPEG1VIDEO:
2485                     if(s->mb_skip_run) is_gob_start=0;
2486                     break;
2487                 }
2488
2489                 if(is_gob_start){
2490                     if(s->start_mb_y != mb_y || mb_x!=0){
2491                         write_slice_end(s);
2492
2493                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2494                             ff_mpeg4_init_partitions(s);
2495                         }
2496                     }
2497
2498                     assert((put_bits_count(&s->pb)&7) == 0);
2499                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2500
2501                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2502                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2503                         int d = 100 / s->error_rate;
2504                         if(r % d == 0){
2505                             current_packet_size=0;
2506                             s->pb.buf_ptr= s->ptr_lastgob;
2507                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2508                         }
2509                     }
2510
2511                     if (s->avctx->rtp_callback){
2512                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2513                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2514                     }
2515                     update_mb_info(s, 1);
2516
2517                     switch(s->codec_id){
2518                     case AV_CODEC_ID_MPEG4:
2519                         if (CONFIG_MPEG4_ENCODER) {
2520                             ff_mpeg4_encode_video_packet_header(s);
2521                             ff_mpeg4_clean_buffers(s);
2522                         }
2523                     break;
2524                     case AV_CODEC_ID_MPEG1VIDEO:
2525                     case AV_CODEC_ID_MPEG2VIDEO:
2526                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2527                             ff_mpeg1_encode_slice_header(s);
2528                             ff_mpeg1_clean_buffers(s);
2529                         }
2530                     break;
2531                     case AV_CODEC_ID_H263:
2532                     case AV_CODEC_ID_H263P:
2533                         if (CONFIG_H263_ENCODER)
2534                             ff_h263_encode_gob_header(s, mb_y);
2535                     break;
2536                     }
2537
2538                     if(s->flags&CODEC_FLAG_PASS1){
2539                         int bits= put_bits_count(&s->pb);
2540                         s->misc_bits+= bits - s->last_bits;
2541                         s->last_bits= bits;
2542                     }
2543
2544                     s->ptr_lastgob += current_packet_size;
2545                     s->first_slice_line=1;
2546                     s->resync_mb_x=mb_x;
2547                     s->resync_mb_y=mb_y;
2548                 }
2549             }
2550
2551             if(  (s->resync_mb_x   == s->mb_x)
2552                && s->resync_mb_y+1 == s->mb_y){
2553                 s->first_slice_line=0;
2554             }
2555
2556             s->mb_skipped=0;
2557             s->dquant=0; //only for QP_RD
2558
2559             update_mb_info(s, 0);
2560
2561             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2562                 int next_block=0;
2563                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2564
2565                 copy_context_before_encode(&backup_s, s, -1);
2566                 backup_s.pb= s->pb;
2567                 best_s.data_partitioning= s->data_partitioning;
2568                 best_s.partitioned_frame= s->partitioned_frame;
2569                 if(s->data_partitioning){
2570                     backup_s.pb2= s->pb2;
2571                     backup_s.tex_pb= s->tex_pb;
2572                 }
2573
2574                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2575                     s->mv_dir = MV_DIR_FORWARD;
2576                     s->mv_type = MV_TYPE_16X16;
2577                     s->mb_intra= 0;
2578                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2579                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2580                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2581                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2582                 }
2583                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2584                     s->mv_dir = MV_DIR_FORWARD;
2585                     s->mv_type = MV_TYPE_FIELD;
2586                     s->mb_intra= 0;
2587                     for(i=0; i<2; i++){
2588                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2589                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2590                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2591                     }
2592                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2593                                  &dmin, &next_block, 0, 0);
2594                 }
2595                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2596                     s->mv_dir = MV_DIR_FORWARD;
2597                     s->mv_type = MV_TYPE_16X16;
2598                     s->mb_intra= 0;
2599                     s->mv[0][0][0] = 0;
2600                     s->mv[0][0][1] = 0;
2601                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2602                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2603                 }
2604                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2605                     s->mv_dir = MV_DIR_FORWARD;
2606                     s->mv_type = MV_TYPE_8X8;
2607                     s->mb_intra= 0;
2608                     for(i=0; i<4; i++){
2609                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2610                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2611                     }
2612                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2613                                  &dmin, &next_block, 0, 0);
2614                 }
2615                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2616                     s->mv_dir = MV_DIR_FORWARD;
2617                     s->mv_type = MV_TYPE_16X16;
2618                     s->mb_intra= 0;
2619                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2620                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2621                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2622                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2623                 }
2624                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2625                     s->mv_dir = MV_DIR_BACKWARD;
2626                     s->mv_type = MV_TYPE_16X16;
2627                     s->mb_intra= 0;
2628                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2629                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2630                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2631                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2632                 }
2633                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2634                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2635                     s->mv_type = MV_TYPE_16X16;
2636                     s->mb_intra= 0;
2637                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2638                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2639                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2640                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2641                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2642                                  &dmin, &next_block, 0, 0);
2643                 }
2644                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2645                     s->mv_dir = MV_DIR_FORWARD;
2646                     s->mv_type = MV_TYPE_FIELD;
2647                     s->mb_intra= 0;
2648                     for(i=0; i<2; i++){
2649                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2650                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2651                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2652                     }
2653                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2654                                  &dmin, &next_block, 0, 0);
2655                 }
2656                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2657                     s->mv_dir = MV_DIR_BACKWARD;
2658                     s->mv_type = MV_TYPE_FIELD;
2659                     s->mb_intra= 0;
2660                     for(i=0; i<2; i++){
2661                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2662                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2663                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2664                     }
2665                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2666                                  &dmin, &next_block, 0, 0);
2667                 }
2668                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2669                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2670                     s->mv_type = MV_TYPE_FIELD;
2671                     s->mb_intra= 0;
2672                     for(dir=0; dir<2; dir++){
2673                         for(i=0; i<2; i++){
2674                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2675                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2676                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2677                         }
2678                     }
2679                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2680                                  &dmin, &next_block, 0, 0);
2681                 }
2682                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2683                     s->mv_dir = 0;
2684                     s->mv_type = MV_TYPE_16X16;
2685                     s->mb_intra= 1;
2686                     s->mv[0][0][0] = 0;
2687                     s->mv[0][0][1] = 0;
2688                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2689                                  &dmin, &next_block, 0, 0);
2690                     if(s->h263_pred || s->h263_aic){
2691                         if(best_s.mb_intra)
2692                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2693                         else
2694                             ff_clean_intra_table_entries(s); //old mode?
2695                     }
2696                 }
2697
2698                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2699                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2700                         const int last_qp= backup_s.qscale;
2701                         int qpi, qp, dc[6];
2702                         int16_t ac[6][16];
2703                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2704                         static const int dquant_tab[4]={-1,1,-2,2};
2705
2706                         assert(backup_s.dquant == 0);
2707
2708                         //FIXME intra
2709                         s->mv_dir= best_s.mv_dir;
2710                         s->mv_type = MV_TYPE_16X16;
2711                         s->mb_intra= best_s.mb_intra;
2712                         s->mv[0][0][0] = best_s.mv[0][0][0];
2713                         s->mv[0][0][1] = best_s.mv[0][0][1];
2714                         s->mv[1][0][0] = best_s.mv[1][0][0];
2715                         s->mv[1][0][1] = best_s.mv[1][0][1];
2716
2717                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2718                         for(; qpi<4; qpi++){
2719                             int dquant= dquant_tab[qpi];
2720                             qp= last_qp + dquant;
2721                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2722                                 continue;
2723                             backup_s.dquant= dquant;
2724                             if(s->mb_intra && s->dc_val[0]){
2725                                 for(i=0; i<6; i++){
2726                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2727                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2728                                 }
2729                             }
2730
2731                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2732                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2733                             if(best_s.qscale != qp){
2734                                 if(s->mb_intra && s->dc_val[0]){
2735                                     for(i=0; i<6; i++){
2736                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2737                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2738                                     }
2739                                 }
2740                             }
2741                         }
2742                     }
2743                 }
2744                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2745                     int mx= s->b_direct_mv_table[xy][0];
2746                     int my= s->b_direct_mv_table[xy][1];
2747
2748                     backup_s.dquant = 0;
2749                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2750                     s->mb_intra= 0;
2751                     ff_mpeg4_set_direct_mv(s, mx, my);
2752                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2753                                  &dmin, &next_block, mx, my);
2754                 }
2755                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2756                     backup_s.dquant = 0;
2757                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2758                     s->mb_intra= 0;
2759                     ff_mpeg4_set_direct_mv(s, 0, 0);
2760                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2761                                  &dmin, &next_block, 0, 0);
2762                 }
2763                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2764                     int coded=0;
2765                     for(i=0; i<6; i++)
2766                         coded |= s->block_last_index[i];
2767                     if(coded){
2768                         int mx,my;
2769                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2770                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2771                             mx=my=0; //FIXME find the one we actually used
2772                             ff_mpeg4_set_direct_mv(s, mx, my);
2773                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2774                             mx= s->mv[1][0][0];
2775                             my= s->mv[1][0][1];
2776                         }else{
2777                             mx= s->mv[0][0][0];
2778                             my= s->mv[0][0][1];
2779                         }
2780
2781                         s->mv_dir= best_s.mv_dir;
2782                         s->mv_type = best_s.mv_type;
2783                         s->mb_intra= 0;
2784 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2785                         s->mv[0][0][1] = best_s.mv[0][0][1];
2786                         s->mv[1][0][0] = best_s.mv[1][0][0];
2787                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2788                         backup_s.dquant= 0;
2789                         s->skipdct=1;
2790                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2791                                         &dmin, &next_block, mx, my);
2792                         s->skipdct=0;
2793                     }
2794                 }
2795
2796                 s->current_picture.qscale_table[xy] = best_s.qscale;
2797
2798                 copy_context_after_encode(s, &best_s, -1);
2799
2800                 pb_bits_count= put_bits_count(&s->pb);
2801                 flush_put_bits(&s->pb);
2802                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2803                 s->pb= backup_s.pb;
2804
2805                 if(s->data_partitioning){
2806                     pb2_bits_count= put_bits_count(&s->pb2);
2807                     flush_put_bits(&s->pb2);
2808                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2809                     s->pb2= backup_s.pb2;
2810
2811                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2812                     flush_put_bits(&s->tex_pb);
2813                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2814                     s->tex_pb= backup_s.tex_pb;
2815                 }
2816                 s->last_bits= put_bits_count(&s->pb);
2817
2818                 if (CONFIG_H263_ENCODER &&
2819                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2820                     ff_h263_update_motion_val(s);
2821
2822                 if(next_block==0){ //FIXME 16 vs linesize16
2823                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2824                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2825                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2826                 }
2827
2828                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2829                     ff_MPV_decode_mb(s, s->block);
2830             } else {
2831                 int motion_x = 0, motion_y = 0;
2832                 s->mv_type=MV_TYPE_16X16;
2833                 // only one MB-Type possible
2834
2835                 switch(mb_type){
2836                 case CANDIDATE_MB_TYPE_INTRA:
2837                     s->mv_dir = 0;
2838                     s->mb_intra= 1;
2839                     motion_x= s->mv[0][0][0] = 0;
2840                     motion_y= s->mv[0][0][1] = 0;
2841                     break;
2842                 case CANDIDATE_MB_TYPE_INTER:
2843                     s->mv_dir = MV_DIR_FORWARD;
2844                     s->mb_intra= 0;
2845                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2846                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2847                     break;
2848                 case CANDIDATE_MB_TYPE_INTER_I:
2849                     s->mv_dir = MV_DIR_FORWARD;
2850                     s->mv_type = MV_TYPE_FIELD;
2851                     s->mb_intra= 0;
2852                     for(i=0; i<2; i++){
2853                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2854                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2855                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2856                     }
2857                     break;
2858                 case CANDIDATE_MB_TYPE_INTER4V:
2859                     s->mv_dir = MV_DIR_FORWARD;
2860                     s->mv_type = MV_TYPE_8X8;
2861                     s->mb_intra= 0;
2862                     for(i=0; i<4; i++){
2863                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2864                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2865                     }
2866                     break;
2867                 case CANDIDATE_MB_TYPE_DIRECT:
2868                     if (CONFIG_MPEG4_ENCODER) {
2869                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2870                         s->mb_intra= 0;
2871                         motion_x=s->b_direct_mv_table[xy][0];
2872                         motion_y=s->b_direct_mv_table[xy][1];
2873                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2874                     }
2875                     break;
2876                 case CANDIDATE_MB_TYPE_DIRECT0:
2877                     if (CONFIG_MPEG4_ENCODER) {
2878                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2879                         s->mb_intra= 0;
2880                         ff_mpeg4_set_direct_mv(s, 0, 0);
2881                     }
2882                     break;
2883                 case CANDIDATE_MB_TYPE_BIDIR:
2884                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2885                     s->mb_intra= 0;
2886                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2887                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2888                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2889                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2890                     break;
2891                 case CANDIDATE_MB_TYPE_BACKWARD:
2892                     s->mv_dir = MV_DIR_BACKWARD;
2893                     s->mb_intra= 0;
2894                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2895                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2896                     break;
2897                 case CANDIDATE_MB_TYPE_FORWARD:
2898                     s->mv_dir = MV_DIR_FORWARD;
2899                     s->mb_intra= 0;
2900                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2901                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2902                     break;
2903                 case CANDIDATE_MB_TYPE_FORWARD_I:
2904                     s->mv_dir = MV_DIR_FORWARD;
2905                     s->mv_type = MV_TYPE_FIELD;
2906                     s->mb_intra= 0;
2907                     for(i=0; i<2; i++){
2908                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2909                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2910                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2911                     }
2912                     break;
2913                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2914                     s->mv_dir = MV_DIR_BACKWARD;
2915                     s->mv_type = MV_TYPE_FIELD;
2916                     s->mb_intra= 0;
2917                     for(i=0; i<2; i++){
2918                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2919                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2920                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2921                     }
2922                     break;
2923                 case CANDIDATE_MB_TYPE_BIDIR_I:
2924                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2925                     s->mv_type = MV_TYPE_FIELD;
2926                     s->mb_intra= 0;
2927                     for(dir=0; dir<2; dir++){
2928                         for(i=0; i<2; i++){
2929                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2930                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2931                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2932                         }
2933                     }
2934                     break;
2935                 default:
2936                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2937                 }
2938
2939                 encode_mb(s, motion_x, motion_y);
2940
2941                 // RAL: Update last macroblock type
2942                 s->last_mv_dir = s->mv_dir;
2943
2944                 if (CONFIG_H263_ENCODER &&
2945                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2946                     ff_h263_update_motion_val(s);
2947
2948                 ff_MPV_decode_mb(s, s->block);
2949             }
2950
2951             /* clean the MV table in IPS frames for direct mode in B frames */
2952             if(s->mb_intra /* && I,P,S_TYPE */){
2953                 s->p_mv_table[xy][0]=0;
2954                 s->p_mv_table[xy][1]=0;
2955             }
2956
2957             if(s->flags&CODEC_FLAG_PSNR){
2958                 int w= 16;
2959                 int h= 16;
2960
2961                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2962                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2963
2964                 s->current_picture.f.error[0] += sse(
2965                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2966                     s->dest[0], w, h, s->linesize);
2967                 s->current_picture.f.error[1] += sse(
2968                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2969                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2970                 s->current_picture.f.error[2] += sse(
2971                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2972                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2973             }
2974             if(s->loop_filter){
2975                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2976                     ff_h263_loop_filter(s);
2977             }
2978             av_dlog(s->avctx, "MB %d %d bits\n",
2979                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
2980         }
2981     }
2982
2983     //not beautiful here but we must write it before flushing so it has to be here
2984     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
2985         ff_msmpeg4_encode_ext_header(s);
2986
2987     write_slice_end(s);
2988
2989     /* Send the last GOB if RTP */
2990     if (s->avctx->rtp_callback) {
2991         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
2992         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
2993         /* Call the RTP callback to send the last GOB */
2994         emms_c();
2995         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
2996     }
2997
2998     return 0;
2999 }
3000
3001 #define MERGE(field) dst->field += src->field; src->field=0
3002 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3003     MERGE(me.scene_change_score);
3004     MERGE(me.mc_mb_var_sum_temp);
3005     MERGE(me.mb_var_sum_temp);
3006 }
3007
3008 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3009     int i;
3010
3011     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3012     MERGE(dct_count[1]);
3013     MERGE(mv_bits);
3014     MERGE(i_tex_bits);
3015     MERGE(p_tex_bits);
3016     MERGE(i_count);
3017     MERGE(f_count);
3018     MERGE(b_count);
3019     MERGE(skip_count);
3020     MERGE(misc_bits);
3021     MERGE(er.error_count);
3022     MERGE(padding_bug_score);
3023     MERGE(current_picture.f.error[0]);
3024     MERGE(current_picture.f.error[1]);
3025     MERGE(current_picture.f.error[2]);
3026
3027     if(dst->avctx->noise_reduction){
3028         for(i=0; i<64; i++){
3029             MERGE(dct_error_sum[0][i]);
3030             MERGE(dct_error_sum[1][i]);
3031         }
3032     }
3033
3034     assert(put_bits_count(&src->pb) % 8 ==0);
3035     assert(put_bits_count(&dst->pb) % 8 ==0);
3036     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3037     flush_put_bits(&dst->pb);
3038 }
3039
3040 static int estimate_qp(MpegEncContext *s, int dry_run){
3041     if (s->next_lambda){
3042         s->current_picture_ptr->f.quality =
3043         s->current_picture.f.quality = s->next_lambda;
3044         if(!dry_run) s->next_lambda= 0;
3045     } else if (!s->fixed_qscale) {
3046         s->current_picture_ptr->f.quality =
3047         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3048         if (s->current_picture.f.quality < 0)
3049             return -1;
3050     }
3051
3052     if(s->adaptive_quant){
3053         switch(s->codec_id){
3054         case AV_CODEC_ID_MPEG4:
3055             if (CONFIG_MPEG4_ENCODER)
3056                 ff_clean_mpeg4_qscales(s);
3057             break;
3058         case AV_CODEC_ID_H263:
3059         case AV_CODEC_ID_H263P:
3060         case AV_CODEC_ID_FLV1:
3061             if (CONFIG_H263_ENCODER)
3062                 ff_clean_h263_qscales(s);
3063             break;
3064         default:
3065             ff_init_qscale_tab(s);
3066         }
3067
3068         s->lambda= s->lambda_table[0];
3069         //FIXME broken
3070     }else
3071         s->lambda = s->current_picture.f.quality;
3072     update_qscale(s);
3073     return 0;
3074 }
3075
3076 /* must be called before writing the header */
3077 static void set_frame_distances(MpegEncContext * s){
3078     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3079     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3080
3081     if(s->pict_type==AV_PICTURE_TYPE_B){
3082         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3083         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3084     }else{
3085         s->pp_time= s->time - s->last_non_b_time;
3086         s->last_non_b_time= s->time;
3087         assert(s->picture_number==0 || s->pp_time > 0);
3088     }
3089 }
3090
3091 static int encode_picture(MpegEncContext *s, int picture_number)
3092 {
3093     int i, ret;
3094     int bits;
3095     int context_count = s->slice_context_count;
3096
3097     s->picture_number = picture_number;
3098
3099     /* Reset the average MB variance */
3100     s->me.mb_var_sum_temp    =
3101     s->me.mc_mb_var_sum_temp = 0;
3102
3103     /* we need to initialize some time vars before we can encode b-frames */
3104     // RAL: Condition added for MPEG1VIDEO
3105     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3106         set_frame_distances(s);
3107     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3108         ff_set_mpeg4_time(s);
3109
3110     s->me.scene_change_score=0;
3111
3112 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3113
3114     if(s->pict_type==AV_PICTURE_TYPE_I){
3115         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3116         else                        s->no_rounding=0;
3117     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3118         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3119             s->no_rounding ^= 1;
3120     }
3121
3122     if(s->flags & CODEC_FLAG_PASS2){
3123         if (estimate_qp(s,1) < 0)
3124             return -1;
3125         ff_get_2pass_fcode(s);
3126     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3127         if(s->pict_type==AV_PICTURE_TYPE_B)
3128             s->lambda= s->last_lambda_for[s->pict_type];
3129         else
3130             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3131         update_qscale(s);
3132     }
3133
3134     s->mb_intra=0; //for the rate distortion & bit compare functions
3135     for(i=1; i<context_count; i++){
3136         ret = ff_update_duplicate_context(s->thread_context[i], s);
3137         if (ret < 0)
3138             return ret;
3139     }
3140
3141     if(ff_init_me(s)<0)
3142         return -1;
3143
3144     /* Estimate motion for every MB */
3145     if(s->pict_type != AV_PICTURE_TYPE_I){
3146         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3147         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3148         if (s->pict_type != AV_PICTURE_TYPE_B) {
3149             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3150                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3151             }
3152         }
3153
3154         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3155     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3156         /* I-Frame */
3157         for(i=0; i<s->mb_stride*s->mb_height; i++)
3158             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3159
3160         if(!s->fixed_qscale){
3161             /* finding spatial complexity for I-frame rate control */
3162             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3163         }
3164     }
3165     for(i=1; i<context_count; i++){
3166         merge_context_after_me(s, s->thread_context[i]);
3167     }
3168     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3169     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3170     emms_c();
3171
3172     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3173         s->pict_type= AV_PICTURE_TYPE_I;
3174         for(i=0; i<s->mb_stride*s->mb_height; i++)
3175             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3176         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3177                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3178     }
3179
3180     if(!s->umvplus){
3181         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3182             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3183
3184             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3185                 int a,b;
3186                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3187                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3188                 s->f_code= FFMAX3(s->f_code, a, b);
3189             }
3190
3191             ff_fix_long_p_mvs(s);
3192             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3193             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3194                 int j;
3195                 for(i=0; i<2; i++){
3196                     for(j=0; j<2; j++)
3197                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3198                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3199                 }
3200             }
3201         }
3202
3203         if(s->pict_type==AV_PICTURE_TYPE_B){
3204             int a, b;
3205
3206             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3207             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3208             s->f_code = FFMAX(a, b);
3209
3210             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3211             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3212             s->b_code = FFMAX(a, b);
3213
3214             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3215             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3216             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3217             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3218             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3219                 int dir, j;
3220                 for(dir=0; dir<2; dir++){
3221                     for(i=0; i<2; i++){
3222                         for(j=0; j<2; j++){
3223                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3224                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3225                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3226                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3227                         }
3228                     }
3229                 }
3230             }
3231         }
3232     }
3233
3234     if (estimate_qp(s, 0) < 0)
3235         return -1;
3236
3237     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3238         s->qscale= 3; //reduce clipping problems
3239
3240     if (s->out_format == FMT_MJPEG) {
3241         /* for mjpeg, we do include qscale in the matrix */
3242         for(i=1;i<64;i++){
3243             int j= s->dsp.idct_permutation[i];
3244
3245             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3246         }
3247         s->y_dc_scale_table=
3248         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3249         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3250         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3251                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3252         s->qscale= 8;
3253     }
3254
3255     //FIXME var duplication
3256     s->current_picture_ptr->f.key_frame =
3257     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3258     s->current_picture_ptr->f.pict_type =
3259     s->current_picture.f.pict_type = s->pict_type;
3260
3261     if (s->current_picture.f.key_frame)
3262         s->picture_in_gop_number=0;
3263
3264     s->last_bits= put_bits_count(&s->pb);
3265     switch(s->out_format) {
3266     case FMT_MJPEG:
3267         if (CONFIG_MJPEG_ENCODER)
3268             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3269                                            s->intra_matrix);
3270         break;
3271     case FMT_H261:
3272         if (CONFIG_H261_ENCODER)
3273             ff_h261_encode_picture_header(s, picture_number);
3274         break;
3275     case FMT_H263:
3276         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3277             ff_wmv2_encode_picture_header(s, picture_number);
3278         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3279             ff_msmpeg4_encode_picture_header(s, picture_number);
3280         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3281             ff_mpeg4_encode_picture_header(s, picture_number);
3282         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3283             ff_rv10_encode_picture_header(s, picture_number);
3284         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3285             ff_rv20_encode_picture_header(s, picture_number);
3286         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3287             ff_flv_encode_picture_header(s, picture_number);
3288         else if (CONFIG_H263_ENCODER)
3289             ff_h263_encode_picture_header(s, picture_number);
3290         break;
3291     case FMT_MPEG1:
3292         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3293             ff_mpeg1_encode_picture_header(s, picture_number);
3294         break;
3295     default:
3296         assert(0);
3297     }
3298     bits= put_bits_count(&s->pb);
3299     s->header_bits= bits - s->last_bits;
3300
3301     for(i=1; i<context_count; i++){
3302         update_duplicate_context_after_me(s->thread_context[i], s);
3303     }
3304     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3305     for(i=1; i<context_count; i++){
3306         merge_context_after_encode(s, s->thread_context[i]);
3307     }
3308     emms_c();
3309     return 0;
3310 }
3311
3312 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3313     const int intra= s->mb_intra;
3314     int i;
3315
3316     s->dct_count[intra]++;
3317
3318     for(i=0; i<64; i++){
3319         int level= block[i];
3320
3321         if(level){
3322             if(level>0){
3323                 s->dct_error_sum[intra][i] += level;
3324                 level -= s->dct_offset[intra][i];
3325                 if(level<0) level=0;
3326             }else{
3327                 s->dct_error_sum[intra][i] -= level;
3328                 level += s->dct_offset[intra][i];
3329                 if(level>0) level=0;
3330             }
3331             block[i]= level;
3332         }
3333     }
3334 }
3335
3336 static int dct_quantize_trellis_c(MpegEncContext *s,
3337                                   int16_t *block, int n,
3338                                   int qscale, int *overflow){
3339     const int *qmat;
3340     const uint8_t *scantable= s->intra_scantable.scantable;
3341     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3342     int max=0;
3343     unsigned int threshold1, threshold2;
3344     int bias=0;
3345     int run_tab[65];
3346     int level_tab[65];
3347     int score_tab[65];
3348     int survivor[65];
3349     int survivor_count;
3350     int last_run=0;
3351     int last_level=0;
3352     int last_score= 0;
3353     int last_i;
3354     int coeff[2][64];
3355     int coeff_count[64];
3356     int qmul, qadd, start_i, last_non_zero, i, dc;
3357     const int esc_length= s->ac_esc_length;
3358     uint8_t * length;
3359     uint8_t * last_length;
3360     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3361
3362     s->dsp.fdct (block);
3363
3364     if(s->dct_error_sum)
3365         s->denoise_dct(s, block);
3366     qmul= qscale*16;
3367     qadd= ((qscale-1)|1)*8;
3368
3369     if (s->mb_intra) {
3370         int q;
3371         if (!s->h263_aic) {
3372             if (n < 4)
3373                 q = s->y_dc_scale;
3374             else
3375                 q = s->c_dc_scale;
3376             q = q << 3;
3377         } else{
3378             /* For AIC we skip quant/dequant of INTRADC */
3379             q = 1 << 3;
3380             qadd=0;
3381         }
3382
3383         /* note: block[0] is assumed to be positive */
3384         block[0] = (block[0] + (q >> 1)) / q;
3385         start_i = 1;
3386         last_non_zero = 0;
3387         qmat = s->q_intra_matrix[qscale];
3388         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3389             bias= 1<<(QMAT_SHIFT-1);
3390         length     = s->intra_ac_vlc_length;
3391         last_length= s->intra_ac_vlc_last_length;
3392     } else {
3393         start_i = 0;
3394         last_non_zero = -1;
3395         qmat = s->q_inter_matrix[qscale];
3396         length     = s->inter_ac_vlc_length;
3397         last_length= s->inter_ac_vlc_last_length;
3398     }
3399     last_i= start_i;
3400
3401     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3402     threshold2= (threshold1<<1);
3403
3404     for(i=63; i>=start_i; i--) {
3405         const int j = scantable[i];
3406         int level = block[j] * qmat[j];
3407
3408         if(((unsigned)(level+threshold1))>threshold2){
3409             last_non_zero = i;
3410             break;
3411         }
3412     }
3413
3414     for(i=start_i; i<=last_non_zero; i++) {
3415         const int j = scantable[i];
3416         int level = block[j] * qmat[j];
3417
3418 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3419 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3420         if(((unsigned)(level+threshold1))>threshold2){
3421             if(level>0){
3422                 level= (bias + level)>>QMAT_SHIFT;
3423                 coeff[0][i]= level;
3424                 coeff[1][i]= level-1;
3425 //                coeff[2][k]= level-2;
3426             }else{
3427                 level= (bias - level)>>QMAT_SHIFT;
3428                 coeff[0][i]= -level;
3429                 coeff[1][i]= -level+1;
3430 //                coeff[2][k]= -level+2;
3431             }
3432             coeff_count[i]= FFMIN(level, 2);
3433             assert(coeff_count[i]);
3434             max |=level;
3435         }else{
3436             coeff[0][i]= (level>>31)|1;
3437             coeff_count[i]= 1;
3438         }
3439     }
3440
3441     *overflow= s->max_qcoeff < max; //overflow might have happened
3442
3443     if(last_non_zero < start_i){
3444         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3445         return last_non_zero;
3446     }
3447
3448     score_tab[start_i]= 0;
3449     survivor[0]= start_i;
3450     survivor_count= 1;
3451
3452     for(i=start_i; i<=last_non_zero; i++){
3453         int level_index, j, zero_distortion;
3454         int dct_coeff= FFABS(block[ scantable[i] ]);
3455         int best_score=256*256*256*120;
3456
3457         if (s->dsp.fdct == ff_fdct_ifast)
3458             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3459         zero_distortion= dct_coeff*dct_coeff;
3460
3461         for(level_index=0; level_index < coeff_count[i]; level_index++){
3462             int distortion;
3463             int level= coeff[level_index][i];
3464             const int alevel= FFABS(level);
3465             int unquant_coeff;
3466
3467             assert(level);
3468
3469             if(s->out_format == FMT_H263){
3470                 unquant_coeff= alevel*qmul + qadd;
3471             }else{ //MPEG1
3472                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3473                 if(s->mb_intra){
3474                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3475                         unquant_coeff =   (unquant_coeff - 1) | 1;
3476                 }else{
3477                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3478                         unquant_coeff =   (unquant_coeff - 1) | 1;
3479                 }
3480                 unquant_coeff<<= 3;
3481             }
3482
3483             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3484             level+=64;
3485             if((level&(~127)) == 0){
3486                 for(j=survivor_count-1; j>=0; j--){
3487                     int run= i - survivor[j];
3488                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3489                     score += score_tab[i-run];
3490
3491                     if(score < best_score){
3492                         best_score= score;
3493                         run_tab[i+1]= run;
3494                         level_tab[i+1]= level-64;
3495                     }
3496                 }
3497
3498                 if(s->out_format == FMT_H263){
3499                     for(j=survivor_count-1; j>=0; j--){
3500                         int run= i - survivor[j];
3501                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3502                         score += score_tab[i-run];
3503                         if(score < last_score){
3504                             last_score= score;
3505                             last_run= run;
3506                             last_level= level-64;
3507                             last_i= i+1;
3508                         }
3509                     }
3510                 }
3511             }else{
3512                 distortion += esc_length*lambda;
3513                 for(j=survivor_count-1; j>=0; j--){
3514                     int run= i - survivor[j];
3515                     int score= distortion + score_tab[i-run];
3516
3517                     if(score < best_score){
3518                         best_score= score;
3519                         run_tab[i+1]= run;
3520                         level_tab[i+1]= level-64;
3521                     }
3522                 }
3523
3524                 if(s->out_format == FMT_H263){
3525                   for(j=survivor_count-1; j>=0; j--){
3526                         int run= i - survivor[j];
3527                         int score= distortion + score_tab[i-run];
3528                         if(score < last_score){
3529                             last_score= score;
3530                             last_run= run;
3531                             last_level= level-64;
3532                             last_i= i+1;
3533                         }
3534                     }
3535                 }
3536             }
3537         }
3538
3539         score_tab[i+1]= best_score;
3540
3541         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3542         if(last_non_zero <= 27){
3543             for(; survivor_count; survivor_count--){
3544                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3545                     break;
3546             }
3547         }else{
3548             for(; survivor_count; survivor_count--){
3549                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3550                     break;
3551             }
3552         }
3553
3554         survivor[ survivor_count++ ]= i+1;
3555     }
3556
3557     if(s->out_format != FMT_H263){
3558         last_score= 256*256*256*120;
3559         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3560             int score= score_tab[i];
3561             if(i) score += lambda*2; //FIXME exacter?
3562
3563             if(score < last_score){
3564                 last_score= score;
3565                 last_i= i;
3566                 last_level= level_tab[i];
3567                 last_run= run_tab[i];
3568             }
3569         }
3570     }
3571
3572     s->coded_score[n] = last_score;
3573
3574     dc= FFABS(block[0]);
3575     last_non_zero= last_i - 1;
3576     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3577
3578     if(last_non_zero < start_i)
3579         return last_non_zero;
3580
3581     if(last_non_zero == 0 && start_i == 0){
3582         int best_level= 0;
3583         int best_score= dc * dc;
3584
3585         for(i=0; i<coeff_count[0]; i++){
3586             int level= coeff[i][0];
3587             int alevel= FFABS(level);
3588             int unquant_coeff, score, distortion;
3589
3590             if(s->out_format == FMT_H263){
3591                     unquant_coeff= (alevel*qmul + qadd)>>3;
3592             }else{ //MPEG1
3593                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3594                     unquant_coeff =   (unquant_coeff - 1) | 1;
3595             }
3596             unquant_coeff = (unquant_coeff + 4) >> 3;
3597             unquant_coeff<<= 3 + 3;
3598
3599             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3600             level+=64;
3601             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3602             else                    score= distortion + esc_length*lambda;
3603
3604             if(score < best_score){
3605                 best_score= score;
3606                 best_level= level - 64;
3607             }
3608         }
3609         block[0]= best_level;
3610         s->coded_score[n] = best_score - dc*dc;
3611         if(best_level == 0) return -1;
3612         else                return last_non_zero;
3613     }
3614
3615     i= last_i;
3616     assert(last_level);
3617
3618     block[ perm_scantable[last_non_zero] ]= last_level;
3619     i -= last_run + 1;
3620
3621     for(; i>start_i; i -= run_tab[i] + 1){
3622         block[ perm_scantable[i-1] ]= level_tab[i];
3623     }
3624
3625     return last_non_zero;
3626 }
3627
3628 //#define REFINE_STATS 1
3629 static int16_t basis[64][64];
3630
3631 static void build_basis(uint8_t *perm){
3632     int i, j, x, y;
3633     emms_c();
3634     for(i=0; i<8; i++){
3635         for(j=0; j<8; j++){
3636             for(y=0; y<8; y++){
3637                 for(x=0; x<8; x++){
3638                     double s= 0.25*(1<<BASIS_SHIFT);
3639                     int index= 8*i + j;
3640                     int perm_index= perm[index];
3641                     if(i==0) s*= sqrt(0.5);
3642                     if(j==0) s*= sqrt(0.5);
3643                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3644                 }
3645             }
3646         }
3647     }
3648 }
3649
3650 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3651                         int16_t *block, int16_t *weight, int16_t *orig,
3652                         int n, int qscale){
3653     int16_t rem[64];
3654     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3655     const uint8_t *scantable= s->intra_scantable.scantable;
3656     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3657 //    unsigned int threshold1, threshold2;
3658 //    int bias=0;
3659     int run_tab[65];
3660     int prev_run=0;
3661     int prev_level=0;
3662     int qmul, qadd, start_i, last_non_zero, i, dc;
3663     uint8_t * length;
3664     uint8_t * last_length;
3665     int lambda;
3666     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3667 #ifdef REFINE_STATS
3668 static int count=0;
3669 static int after_last=0;
3670 static int to_zero=0;
3671 static int from_zero=0;
3672 static int raise=0;
3673 static int lower=0;
3674 static int messed_sign=0;
3675 #endif
3676
3677     if(basis[0][0] == 0)
3678         build_basis(s->dsp.idct_permutation);
3679
3680     qmul= qscale*2;
3681     qadd= (qscale-1)|1;
3682     if (s->mb_intra) {
3683         if (!s->h263_aic) {
3684             if (n < 4)
3685                 q = s->y_dc_scale;
3686             else
3687                 q = s->c_dc_scale;
3688         } else{
3689             /* For AIC we skip quant/dequant of INTRADC */
3690             q = 1;
3691             qadd=0;
3692         }
3693         q <<= RECON_SHIFT-3;
3694         /* note: block[0] is assumed to be positive */
3695         dc= block[0]*q;
3696 //        block[0] = (block[0] + (q >> 1)) / q;
3697         start_i = 1;
3698 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3699 //            bias= 1<<(QMAT_SHIFT-1);
3700         length     = s->intra_ac_vlc_length;
3701         last_length= s->intra_ac_vlc_last_length;
3702     } else {
3703         dc= 0;
3704         start_i = 0;
3705         length     = s->inter_ac_vlc_length;
3706         last_length= s->inter_ac_vlc_last_length;
3707     }
3708     last_non_zero = s->block_last_index[n];
3709
3710 #ifdef REFINE_STATS
3711 {START_TIMER
3712 #endif
3713     dc += (1<<(RECON_SHIFT-1));
3714     for(i=0; i<64; i++){
3715         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3716     }
3717 #ifdef REFINE_STATS
3718 STOP_TIMER("memset rem[]")}
3719 #endif
3720     sum=0;
3721     for(i=0; i<64; i++){
3722         int one= 36;
3723         int qns=4;
3724         int w;
3725
3726         w= FFABS(weight[i]) + qns*one;
3727         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3728
3729         weight[i] = w;
3730 //        w=weight[i] = (63*qns + (w/2)) / w;
3731
3732         assert(w>0);
3733         assert(w<(1<<6));
3734         sum += w*w;
3735     }
3736     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3737 #ifdef REFINE_STATS
3738 {START_TIMER
3739 #endif
3740     run=0;
3741     rle_index=0;
3742     for(i=start_i; i<=last_non_zero; i++){
3743         int j= perm_scantable[i];
3744         const int level= block[j];
3745         int coeff;
3746
3747         if(level){
3748             if(level<0) coeff= qmul*level - qadd;
3749             else        coeff= qmul*level + qadd;
3750             run_tab[rle_index++]=run;
3751             run=0;
3752
3753             s->dsp.add_8x8basis(rem, basis[j], coeff);
3754         }else{
3755             run++;
3756         }
3757     }
3758 #ifdef REFINE_STATS
3759 if(last_non_zero>0){
3760 STOP_TIMER("init rem[]")
3761 }
3762 }
3763
3764 {START_TIMER
3765 #endif
3766     for(;;){
3767         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3768         int best_coeff=0;
3769         int best_change=0;
3770         int run2, best_unquant_change=0, analyze_gradient;
3771 #ifdef REFINE_STATS
3772 {START_TIMER
3773 #endif
3774         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3775
3776         if(analyze_gradient){
3777 #ifdef REFINE_STATS
3778 {START_TIMER
3779 #endif
3780             for(i=0; i<64; i++){
3781                 int w= weight[i];
3782
3783                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3784             }
3785 #ifdef REFINE_STATS
3786 STOP_TIMER("rem*w*w")}
3787 {START_TIMER
3788 #endif
3789             s->dsp.fdct(d1);
3790 #ifdef REFINE_STATS
3791 STOP_TIMER("dct")}
3792 #endif
3793         }
3794
3795         if(start_i){
3796             const int level= block[0];
3797             int change, old_coeff;
3798
3799             assert(s->mb_intra);
3800
3801             old_coeff= q*level;
3802
3803             for(change=-1; change<=1; change+=2){
3804                 int new_level= level + change;
3805                 int score, new_coeff;
3806
3807                 new_coeff= q*new_level;
3808                 if(new_coeff >= 2048 || new_coeff < 0)
3809                     continue;
3810
3811                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3812                 if(score<best_score){
3813                     best_score= score;
3814                     best_coeff= 0;
3815                     best_change= change;
3816                     best_unquant_change= new_coeff - old_coeff;
3817                 }
3818             }
3819         }
3820
3821         run=0;
3822         rle_index=0;
3823         run2= run_tab[rle_index++];
3824         prev_level=0;
3825         prev_run=0;
3826
3827         for(i=start_i; i<64; i++){
3828             int j= perm_scantable[i];
3829             const int level= block[j];
3830             int change, old_coeff;
3831
3832             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3833                 break;
3834
3835             if(level){
3836                 if(level<0) old_coeff= qmul*level - qadd;
3837                 else        old_coeff= qmul*level + qadd;
3838                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3839             }else{
3840                 old_coeff=0;
3841                 run2--;
3842                 assert(run2>=0 || i >= last_non_zero );
3843             }
3844
3845             for(change=-1; change<=1; change+=2){
3846                 int new_level= level + change;
3847                 int score, new_coeff, unquant_change;
3848
3849                 score=0;
3850                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3851                    continue;
3852
3853                 if(new_level){
3854                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3855                     else            new_coeff= qmul*new_level + qadd;
3856                     if(new_coeff >= 2048 || new_coeff <= -2048)
3857                         continue;
3858                     //FIXME check for overflow
3859
3860                     if(level){
3861                         if(level < 63 && level > -63){
3862                             if(i < last_non_zero)
3863                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3864                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3865                             else
3866                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3867                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3868                         }
3869                     }else{
3870                         assert(FFABS(new_level)==1);
3871
3872                         if(analyze_gradient){
3873                             int g= d1[ scantable[i] ];
3874                             if(g && (g^new_level) >= 0)
3875                                 continue;
3876                         }
3877
3878                         if(i < last_non_zero){
3879                             int next_i= i + run2 + 1;
3880                             int next_level= block[ perm_scantable[next_i] ] + 64;
3881
3882                             if(next_level&(~127))
3883                                 next_level= 0;
3884
3885                             if(next_i < last_non_zero)
3886                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3887                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3888                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3889                             else
3890                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3891                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3892                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3893                         }else{
3894                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3895                             if(prev_level){
3896                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3897                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3898                             }
3899                         }
3900                     }
3901                 }else{
3902                     new_coeff=0;
3903                     assert(FFABS(level)==1);
3904
3905                     if(i < last_non_zero){
3906                         int next_i= i + run2 + 1;
3907                         int next_level= block[ perm_scantable[next_i] ] + 64;
3908
3909                         if(next_level&(~127))
3910                             next_level= 0;
3911
3912                         if(next_i < last_non_zero)
3913                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3914                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3915                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3916                         else
3917                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3918                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3919                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3920                     }else{
3921                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3922                         if(prev_level){
3923                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3924                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3925                         }
3926                     }
3927                 }
3928
3929                 score *= lambda;
3930
3931                 unquant_change= new_coeff - old_coeff;
3932                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3933
3934                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3935                 if(score<best_score){
3936                     best_score= score;
3937                     best_coeff= i;
3938                     best_change= change;
3939                     best_unquant_change= unquant_change;
3940                 }
3941             }
3942             if(level){
3943                 prev_level= level + 64;
3944                 if(prev_level&(~127))
3945                     prev_level= 0;
3946                 prev_run= run;
3947                 run=0;
3948             }else{
3949                 run++;
3950             }
3951         }
3952 #ifdef REFINE_STATS
3953 STOP_TIMER("iterative step")}
3954 #endif
3955
3956         if(best_change){
3957             int j= perm_scantable[ best_coeff ];
3958
3959             block[j] += best_change;
3960
3961             if(best_coeff > last_non_zero){
3962                 last_non_zero= best_coeff;
3963                 assert(block[j]);
3964 #ifdef REFINE_STATS
3965 after_last++;
3966 #endif
3967             }else{
3968 #ifdef REFINE_STATS
3969 if(block[j]){
3970     if(block[j] - best_change){
3971         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3972             raise++;
3973         }else{
3974             lower++;
3975         }
3976     }else{
3977         from_zero++;
3978     }
3979 }else{
3980     to_zero++;
3981 }
3982 #endif
3983                 for(; last_non_zero>=start_i; last_non_zero--){
3984                     if(block[perm_scantable[last_non_zero]])
3985                         break;
3986                 }
3987             }
3988 #ifdef REFINE_STATS
3989 count++;
3990 if(256*256*256*64 % count == 0){
3991     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
3992 }
3993 #endif
3994             run=0;
3995             rle_index=0;
3996             for(i=start_i; i<=last_non_zero; i++){
3997                 int j= perm_scantable[i];
3998                 const int level= block[j];
3999
4000                  if(level){
4001                      run_tab[rle_index++]=run;
4002                      run=0;
4003                  }else{
4004                      run++;
4005                  }
4006             }
4007
4008             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4009         }else{
4010             break;
4011         }
4012     }
4013 #ifdef REFINE_STATS
4014 if(last_non_zero>0){
4015 STOP_TIMER("iterative search")
4016 }
4017 }
4018 #endif
4019
4020     return last_non_zero;
4021 }
4022
4023 int ff_dct_quantize_c(MpegEncContext *s,
4024                         int16_t *block, int n,
4025                         int qscale, int *overflow)
4026 {
4027     int i, j, level, last_non_zero, q, start_i;
4028     const int *qmat;
4029     const uint8_t *scantable= s->intra_scantable.scantable;
4030     int bias;
4031     int max=0;
4032     unsigned int threshold1, threshold2;
4033
4034     s->dsp.fdct (block);
4035
4036     if(s->dct_error_sum)
4037         s->denoise_dct(s, block);
4038
4039     if (s->mb_intra) {
4040         if (!s->h263_aic) {
4041             if (n < 4)
4042                 q = s->y_dc_scale;
4043             else
4044                 q = s->c_dc_scale;
4045             q = q << 3;
4046         } else
4047             /* For AIC we skip quant/dequant of INTRADC */
4048             q = 1 << 3;
4049
4050         /* note: block[0] is assumed to be positive */
4051         block[0] = (block[0] + (q >> 1)) / q;
4052         start_i = 1;
4053         last_non_zero = 0;
4054         qmat = s->q_intra_matrix[qscale];
4055         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4056     } else {
4057         start_i = 0;
4058         last_non_zero = -1;
4059         qmat = s->q_inter_matrix[qscale];
4060         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4061     }
4062     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4063     threshold2= (threshold1<<1);
4064     for(i=63;i>=start_i;i--) {
4065         j = scantable[i];
4066         level = block[j] * qmat[j];
4067
4068         if(((unsigned)(level+threshold1))>threshold2){
4069             last_non_zero = i;
4070             break;
4071         }else{
4072             block[j]=0;
4073         }
4074     }
4075     for(i=start_i; i<=last_non_zero; i++) {
4076         j = scantable[i];
4077         level = block[j] * qmat[j];
4078
4079 //        if(   bias+level >= (1<<QMAT_SHIFT)
4080 //           || bias-level >= (1<<QMAT_SHIFT)){
4081         if(((unsigned)(level+threshold1))>threshold2){
4082             if(level>0){
4083                 level= (bias + level)>>QMAT_SHIFT;
4084                 block[j]= level;
4085             }else{
4086                 level= (bias - level)>>QMAT_SHIFT;
4087                 block[j]= -level;
4088             }
4089             max |=level;
4090         }else{
4091             block[j]=0;
4092         }
4093     }
4094     *overflow= s->max_qcoeff < max; //overflow might have happened
4095
4096     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4097     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4098         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4099
4100     return last_non_zero;
4101 }
4102
4103 #define OFFSET(x) offsetof(MpegEncContext, x)
4104 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4105 static const AVOption h263_options[] = {
4106     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4107     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4108     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4109     FF_MPV_COMMON_OPTS
4110     { NULL },
4111 };
4112
4113 static const AVClass h263_class = {
4114     .class_name = "H.263 encoder",
4115     .item_name  = av_default_item_name,
4116     .option     = h263_options,
4117     .version    = LIBAVUTIL_VERSION_INT,
4118 };
4119
4120 AVCodec ff_h263_encoder = {
4121     .name           = "h263",
4122     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4123     .type           = AVMEDIA_TYPE_VIDEO,
4124     .id             = AV_CODEC_ID_H263,
4125     .priv_data_size = sizeof(MpegEncContext),
4126     .init           = ff_MPV_encode_init,
4127     .encode2        = ff_MPV_encode_picture,
4128     .close          = ff_MPV_encode_end,
4129     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4130     .priv_class     = &h263_class,
4131 };
4132
4133 static const AVOption h263p_options[] = {
4134     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4135     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4136     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4137     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4138     FF_MPV_COMMON_OPTS
4139     { NULL },
4140 };
4141 static const AVClass h263p_class = {
4142     .class_name = "H.263p encoder",
4143     .item_name  = av_default_item_name,
4144     .option     = h263p_options,
4145     .version    = LIBAVUTIL_VERSION_INT,
4146 };
4147
4148 AVCodec ff_h263p_encoder = {
4149     .name           = "h263p",
4150     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4151     .type           = AVMEDIA_TYPE_VIDEO,
4152     .id             = AV_CODEC_ID_H263P,
4153     .priv_data_size = sizeof(MpegEncContext),
4154     .init           = ff_MPV_encode_init,
4155     .encode2        = ff_MPV_encode_picture,
4156     .close          = ff_MPV_encode_end,
4157     .capabilities   = CODEC_CAP_SLICE_THREADS,
4158     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4159     .priv_class     = &h263p_class,
4160 };
4161
4162 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4163
4164 AVCodec ff_msmpeg4v2_encoder = {
4165     .name           = "msmpeg4v2",
4166     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4167     .type           = AVMEDIA_TYPE_VIDEO,
4168     .id             = AV_CODEC_ID_MSMPEG4V2,
4169     .priv_data_size = sizeof(MpegEncContext),
4170     .init           = ff_MPV_encode_init,
4171     .encode2        = ff_MPV_encode_picture,
4172     .close          = ff_MPV_encode_end,
4173     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4174     .priv_class     = &msmpeg4v2_class,
4175 };
4176
4177 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4178
4179 AVCodec ff_msmpeg4v3_encoder = {
4180     .name           = "msmpeg4",
4181     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4182     .type           = AVMEDIA_TYPE_VIDEO,
4183     .id             = AV_CODEC_ID_MSMPEG4V3,
4184     .priv_data_size = sizeof(MpegEncContext),
4185     .init           = ff_MPV_encode_init,
4186     .encode2        = ff_MPV_encode_picture,
4187     .close          = ff_MPV_encode_end,
4188     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4189     .priv_class     = &msmpeg4v3_class,
4190 };
4191
4192 FF_MPV_GENERIC_CLASS(wmv1)
4193
4194 AVCodec ff_wmv1_encoder = {
4195     .name           = "wmv1",
4196     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4197     .type           = AVMEDIA_TYPE_VIDEO,
4198     .id             = AV_CODEC_ID_WMV1,
4199     .priv_data_size = sizeof(MpegEncContext),
4200     .init           = ff_MPV_encode_init,
4201     .encode2        = ff_MPV_encode_picture,
4202     .close          = ff_MPV_encode_end,
4203     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4204     .priv_class     = &wmv1_class,
4205 };