git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "h261.h"
  44 #include "h263.h"
  45 #include "mjpegenc_common.h"
  46 #include "mathops.h"
  47 #include "mpegutils.h"
  48 #include "mjpegenc.h"
  49 #include "msmpeg4.h"
  50 #include "pixblockdsp.h"
  51 #include "qpeldsp.h"
  52 #include "faandct.h"
  53 #include "thread.h"
  54 #include "aandcttab.h"
  55 #include "flv.h"
  56 #include "mpeg4video.h"
  57 #include "internal.h"
  58 #include "bytestream.h"
  59 #include <limits.h>
  60 #include "sp5x.h"
  61
  62 static int encode_picture(MpegEncContext *s, int picture_number);
  63 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  64 static int sse_mb(MpegEncContext *s);
  65 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  66 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  67
  68 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  69 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  70
  71 const AVOption ff_mpv_generic_options[] = {
  72     FF_MPV_COMMON_OPTS
  73     { NULL },
  74 };
  75
  76 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  77                        uint16_t (*qmat16)[2][64],
  78                        const uint16_t *quant_matrix,
  79                        int bias, int qmin, int qmax, int intra)
  80 {
  81     FDCTDSPContext *fdsp = &s->fdsp;
  82     int qscale;
  83     int shift = 0;
  84
  85     for (qscale = qmin; qscale <= qmax; qscale++) {
  86         int i;
  87         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  88             fdsp->fdct == ff_jpeg_fdct_islow_10 ||
  89             fdsp->fdct == ff_faandct) {
  90             for (i = 0; i < 64; i++) {
  91                 const int j = s->idsp.idct_permutation[i];
  92                 /* 16 <= qscale * quant_matrix[i] <= 7905
  93                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  94                  *             19952 <=              x  <= 249205026
  95                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
  96                  *           3444240 >= (1 << 36) / (x) >= 275 */
  97
  98                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
  99                                         (qscale * quant_matrix[j]));
 100             }
 101         } else if (fdsp->fdct == ff_fdct_ifast) {
 102             for (i = 0; i < 64; i++) {
 103                 const int j = s->idsp.idct_permutation[i];
 104                 /* 16 <= qscale * quant_matrix[i] <= 7905
 105                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 106                  *             19952 <=              x  <= 249205026
 107                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 108                  *           3444240 >= (1 << 36) / (x) >= 275 */
 109
 110                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 111                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
 112             }
 113         } else {
 114             for (i = 0; i < 64; i++) {
 115                 const int j = s->idsp.idct_permutation[i];
 116                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 117                  * Assume x = qscale * quant_matrix[i]
 118                  * So             16 <=              x  <= 7905
 119                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 120                  * so          32768 >= (1 << 19) / (x) >= 67 */
 121                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 122                                         (qscale * quant_matrix[j]));
 123                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 124                 //                    (qscale * quant_matrix[i]);
 125                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 126                                        (qscale * quant_matrix[j]);
 127
 128                 if (qmat16[qscale][0][i] == 0 ||
 129                     qmat16[qscale][0][i] == 128 * 256)
 130                     qmat16[qscale][0][i] = 128 * 256 - 1;
 131                 qmat16[qscale][1][i] =
 132                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 133                                 qmat16[qscale][0][i]);
 134             }
 135         }
 136
 137         for (i = intra; i < 64; i++) {
 138             int64_t max = 8191;
 139             if (fdsp->fdct == ff_fdct_ifast) {
 140                 max = (8191LL * ff_aanscales[i]) >> 14;
 141             }
 142             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 143                 shift++;
 144             }
 145         }
 146     }
 147     if (shift) {
 148         av_log(NULL, AV_LOG_INFO,
 149                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 150                QMAT_SHIFT - shift);
 151     }
 152 }
 153
 154 static inline void update_qscale(MpegEncContext *s)
 155 {
 156     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 157                 (FF_LAMBDA_SHIFT + 7);
 158     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 159
 160     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 161                  FF_LAMBDA_SHIFT;
 162 }
 163
 164 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 165 {
 166     int i;
 167
 168     if (matrix) {
 169         put_bits(pb, 1, 1);
 170         for (i = 0; i < 64; i++) {
 171             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 172         }
 173     } else
 174         put_bits(pb, 1, 0);
 175 }
 176
 177 /**
 178  * init s->current_picture.qscale_table from s->lambda_table
 179  */
 180 void ff_init_qscale_tab(MpegEncContext *s)
 181 {
 182     int8_t * const qscale_table = s->current_picture.qscale_table;
 183     int i;
 184
 185     for (i = 0; i < s->mb_num; i++) {
 186         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 187         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 188         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 189                                                   s->avctx->qmax);
 190     }
 191 }
 192
 193 static void update_duplicate_context_after_me(MpegEncContext *dst,
 194                                               MpegEncContext *src)
 195 {
 196 #define COPY(a) dst->a= src->a
 197     COPY(pict_type);
 198     COPY(current_picture);
 199     COPY(f_code);
 200     COPY(b_code);
 201     COPY(qscale);
 202     COPY(lambda);
 203     COPY(lambda2);
 204     COPY(picture_in_gop_number);
 205     COPY(gop_picture_number);
 206     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 207     COPY(progressive_frame);    // FIXME don't set in encode_header
 208     COPY(partitioned_frame);    // FIXME don't set in encode_header
 209 #undef COPY
 210 }
 211
 212 /**
 213  * Set the given MpegEncContext to defaults for encoding.
 214  * the changed fields will not depend upon the prior state of the MpegEncContext.
 215  */
 216 static void MPV_encode_defaults(MpegEncContext *s)
 217 {
 218     int i;
 219     ff_MPV_common_defaults(s);
 220
 221     for (i = -16; i < 16; i++) {
 222         default_fcode_tab[i + MAX_MV] = 1;
 223     }
 224     s->me.mv_penalty = default_mv_penalty;
 225     s->fcode_tab     = default_fcode_tab;
 226
 227     s->input_picture_number  = 0;
 228     s->picture_in_gop_number = 0;
 229 }
 230
 231 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 232     if (ARCH_X86)
 233         ff_dct_encode_init_x86(s);
 234
 235     if (CONFIG_H263_ENCODER)
 236         ff_h263dsp_init(&s->h263dsp);
 237     if (!s->dct_quantize)
 238         s->dct_quantize = ff_dct_quantize_c;
 239     if (!s->denoise_dct)
 240         s->denoise_dct  = denoise_dct_c;
 241     s->fast_dct_quantize = s->dct_quantize;
 242     if (s->avctx->trellis)
 243         s->dct_quantize  = dct_quantize_trellis_c;
 244
 245     return 0;
 246 }
 247
 248 /* init video encoder */
 249 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
 250 {
 251     MpegEncContext *s = avctx->priv_data;
 252     int i, ret, format_supported;
 253
 254     MPV_encode_defaults(s);
 255
 256     switch (avctx->codec_id) {
 257     case AV_CODEC_ID_MPEG2VIDEO:
 258         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 259             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 260             av_log(avctx, AV_LOG_ERROR,
 261                    "only YUV420 and YUV422 are supported\n");
 262             return -1;
 263         }
 264         break;
 265     case AV_CODEC_ID_MJPEG:
 266     case AV_CODEC_ID_AMV:
 267         format_supported = 0;
 268         /* JPEG color space */
 269         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 270             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 271             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
 272             (avctx->color_range == AVCOL_RANGE_JPEG &&
 273              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 274               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 275               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
 276             format_supported = 1;
 277         /* MPEG color space */
 278         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 279                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 280                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 281                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
 282             format_supported = 1;
 283
 284         if (!format_supported) {
 285             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 286             return -1;
 287         }
 288         break;
 289     default:
 290         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 291             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 292             return -1;
 293         }
 294     }
 295
 296     switch (avctx->pix_fmt) {
 297     case AV_PIX_FMT_YUVJ444P:
 298     case AV_PIX_FMT_YUV444P:
 299         s->chroma_format = CHROMA_444;
 300         break;
 301     case AV_PIX_FMT_YUVJ422P:
 302     case AV_PIX_FMT_YUV422P:
 303         s->chroma_format = CHROMA_422;
 304         break;
 305     case AV_PIX_FMT_YUVJ420P:
 306     case AV_PIX_FMT_YUV420P:
 307     default:
 308         s->chroma_format = CHROMA_420;
 309         break;
 310     }
 311
 312     s->bit_rate = avctx->bit_rate;
 313     s->width    = avctx->width;
 314     s->height   = avctx->height;
 315     if (avctx->gop_size > 600 &&
 316         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 317         av_log(avctx, AV_LOG_WARNING,
 318                "keyframe interval too large!, reducing it from %d to %d\n",
 319                avctx->gop_size, 600);
 320         avctx->gop_size = 600;
 321     }
 322     s->gop_size     = avctx->gop_size;
 323     s->avctx        = avctx;
 324     s->flags        = avctx->flags;
 325     s->flags2       = avctx->flags2;
 326     if (avctx->max_b_frames > MAX_B_FRAMES) {
 327         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 328                "is %d.\n", MAX_B_FRAMES);
 329         avctx->max_b_frames = MAX_B_FRAMES;
 330     }
 331     s->max_b_frames = avctx->max_b_frames;
 332     s->codec_id     = avctx->codec->id;
 333     s->strict_std_compliance = avctx->strict_std_compliance;
 334     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 335     s->mpeg_quant         = avctx->mpeg_quant;
 336     s->rtp_mode           = !!avctx->rtp_payload_size;
 337     s->intra_dc_precision = avctx->intra_dc_precision;
 338
 339     // workaround some differences between how applications specify dc precission
 340     if (s->intra_dc_precision < 0) {
 341         s->intra_dc_precision += 8;
 342     } else if (s->intra_dc_precision >= 8)
 343         s->intra_dc_precision -= 8;
 344
 345     if (s->intra_dc_precision < 0) {
 346         av_log(avctx, AV_LOG_ERROR,
 347                 "intra dc precision must be positive, note some applications use"
 348                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
 349         return AVERROR(EINVAL);
 350     }
 351
 352     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
 353         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
 354         return AVERROR(EINVAL);
 355     }
 356     s->user_specified_pts = AV_NOPTS_VALUE;
 357
 358     if (s->gop_size <= 1) {
 359         s->intra_only = 1;
 360         s->gop_size   = 12;
 361     } else {
 362         s->intra_only = 0;
 363     }
 364
 365     s->me_method = avctx->me_method;
 366
 367     /* Fixed QSCALE */
 368     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 369
 370     s->adaptive_quant = (s->avctx->lumi_masking ||
 371                          s->avctx->dark_masking ||
 372                          s->avctx->temporal_cplx_masking ||
 373                          s->avctx->spatial_cplx_masking  ||
 374                          s->avctx->p_masking      ||
 375                          s->avctx->border_masking ||
 376                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 377                         !s->fixed_qscale;
 378
 379     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 380
 381     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 382         switch(avctx->codec_id) {
 383         case AV_CODEC_ID_MPEG1VIDEO:
 384         case AV_CODEC_ID_MPEG2VIDEO:
 385             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
 386             break;
 387         case AV_CODEC_ID_MPEG4:
 388         case AV_CODEC_ID_MSMPEG4V1:
 389         case AV_CODEC_ID_MSMPEG4V2:
 390         case AV_CODEC_ID_MSMPEG4V3:
 391             if       (avctx->rc_max_rate >= 15000000) {
 392                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
 393             } else if(avctx->rc_max_rate >=  2000000) {
 394                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
 395             } else if(avctx->rc_max_rate >=   384000) {
 396                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
 397             } else
 398                 avctx->rc_buffer_size = 40;
 399             avctx->rc_buffer_size *= 16384;
 400             break;
 401         }
 402         if (avctx->rc_buffer_size) {
 403             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 404         }
 405     }
 406
 407     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 408         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 409         return -1;
 410     }
 411
 412     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 413         av_log(avctx, AV_LOG_INFO,
 414                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 415     }
 416
 417     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 418         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 419         return -1;
 420     }
 421
 422     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 423         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 424         return -1;
 425     }
 426
 427     if (avctx->rc_max_rate &&
 428         avctx->rc_max_rate == avctx->bit_rate &&
 429         avctx->rc_max_rate != avctx->rc_min_rate) {
 430         av_log(avctx, AV_LOG_INFO,
 431                "impossible bitrate constraints, this will fail\n");
 432     }
 433
 434     if (avctx->rc_buffer_size &&
 435         avctx->bit_rate * (int64_t)avctx->time_base.num >
 436             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 437         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 438         return -1;
 439     }
 440
 441     if (!s->fixed_qscale &&
 442         avctx->bit_rate * av_q2d(avctx->time_base) >
 443             avctx->bit_rate_tolerance) {
 444         av_log(avctx, AV_LOG_WARNING,
 445                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 446         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 447     }
 448
 449     if (s->avctx->rc_max_rate &&
 450         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 451         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 452          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 453         90000LL * (avctx->rc_buffer_size - 1) >
 454             s->avctx->rc_max_rate * 0xFFFFLL) {
 455         av_log(avctx, AV_LOG_INFO,
 456                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 457                "specified vbv buffer is too large for the given bitrate!\n");
 458     }
 459
 460     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 461         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 462         s->codec_id != AV_CODEC_ID_FLV1) {
 463         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 464         return -1;
 465     }
 466
 467     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 468         av_log(avctx, AV_LOG_ERROR,
 469                "OBMC is only supported with simple mb decision\n");
 470         return -1;
 471     }
 472
 473     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 474         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 475         return -1;
 476     }
 477
 478     if (s->max_b_frames                    &&
 479         s->codec_id != AV_CODEC_ID_MPEG4      &&
 480         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 481         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 482         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 483         return -1;
 484     }
 485     if (s->max_b_frames < 0) {
 486         av_log(avctx, AV_LOG_ERROR,
 487                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 488         return -1;
 489     }
 490
 491     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 492          s->codec_id == AV_CODEC_ID_H263  ||
 493          s->codec_id == AV_CODEC_ID_H263P) &&
 494         (avctx->sample_aspect_ratio.num > 255 ||
 495          avctx->sample_aspect_ratio.den > 255)) {
 496         av_log(avctx, AV_LOG_WARNING,
 497                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 498                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 499         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 500                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 501     }
 502
 503     if ((s->codec_id == AV_CODEC_ID_H263  ||
 504          s->codec_id == AV_CODEC_ID_H263P) &&
 505         (avctx->width  > 2048 ||
 506          avctx->height > 1152 )) {
 507         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 508         return -1;
 509     }
 510     if ((s->codec_id == AV_CODEC_ID_H263  ||
 511          s->codec_id == AV_CODEC_ID_H263P) &&
 512         ((avctx->width &3) ||
 513          (avctx->height&3) )) {
 514         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 515         return -1;
 516     }
 517
 518     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 519         (avctx->width  > 4095 ||
 520          avctx->height > 4095 )) {
 521         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 522         return -1;
 523     }
 524
 525     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 526         (avctx->width  > 16383 ||
 527          avctx->height > 16383 )) {
 528         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 529         return -1;
 530     }
 531
 532     if (s->codec_id == AV_CODEC_ID_RV10 &&
 533         (avctx->width &15 ||
 534          avctx->height&15 )) {
 535         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 536         return AVERROR(EINVAL);
 537     }
 538
 539     if (s->codec_id == AV_CODEC_ID_RV20 &&
 540         (avctx->width &3 ||
 541          avctx->height&3 )) {
 542         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 543         return AVERROR(EINVAL);
 544     }
 545
 546     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 547          s->codec_id == AV_CODEC_ID_WMV2) &&
 548          avctx->width & 1) {
 549          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 550          return -1;
 551     }
 552
 553     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 554         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 555         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 556         return -1;
 557     }
 558
 559     // FIXME mpeg2 uses that too
 560     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 561                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 562         av_log(avctx, AV_LOG_ERROR,
 563                "mpeg2 style quantization not supported by codec\n");
 564         return -1;
 565     }
 566
 567     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 568         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 569         return -1;
 570     }
 571
 572     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 573         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 574         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 575         return -1;
 576     }
 577
 578     if (s->avctx->scenechange_threshold < 1000000000 &&
 579         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 580         av_log(avctx, AV_LOG_ERROR,
 581                "closed gop with scene change detection are not supported yet, "
 582                "set threshold to 1000000000\n");
 583         return -1;
 584     }
 585
 586     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 587         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 588             av_log(avctx, AV_LOG_ERROR,
 589                   "low delay forcing is only available for mpeg2\n");
 590             return -1;
 591         }
 592         if (s->max_b_frames != 0) {
 593             av_log(avctx, AV_LOG_ERROR,
 594                    "b frames cannot be used with low delay\n");
 595             return -1;
 596         }
 597     }
 598
 599     if (s->q_scale_type == 1) {
 600         if (avctx->qmax > 12) {
 601             av_log(avctx, AV_LOG_ERROR,
 602                    "non linear quant only supports qmax <= 12 currently\n");
 603             return -1;
 604         }
 605     }
 606
 607     if (s->avctx->thread_count > 1         &&
 608         s->codec_id != AV_CODEC_ID_MPEG4      &&
 609         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 610         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 611         s->codec_id != AV_CODEC_ID_MJPEG      &&
 612         (s->codec_id != AV_CODEC_ID_H263P)) {
 613         av_log(avctx, AV_LOG_ERROR,
 614                "multi threaded encoding not supported by codec\n");
 615         return -1;
 616     }
 617
 618     if (s->avctx->thread_count < 1) {
 619         av_log(avctx, AV_LOG_ERROR,
 620                "automatic thread number detection not supported by codec, "
 621                "patch welcome\n");
 622         return -1;
 623     }
 624
 625     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 626         s->rtp_mode = 1;
 627
 628     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 629         s->h263_slice_structured = 1;
 630
 631     if (!avctx->time_base.den || !avctx->time_base.num) {
 632         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 633         return -1;
 634     }
 635
 636     i = (INT_MAX / 2 + 128) >> 8;
 637     if (avctx->mb_threshold >= i) {
 638         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 639                i - 1);
 640         return -1;
 641     }
 642
 643     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 644         av_log(avctx, AV_LOG_INFO,
 645                "notice: b_frame_strategy only affects the first pass\n");
 646         avctx->b_frame_strategy = 0;
 647     }
 648
 649     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 650     if (i > 1) {
 651         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 652         avctx->time_base.den /= i;
 653         avctx->time_base.num /= i;
 654         //return -1;
 655     }
 656
 657     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 658         // (a + x * 3 / 8) / x
 659         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 660         s->inter_quant_bias = 0;
 661     } else {
 662         s->intra_quant_bias = 0;
 663         // (a - x / 4) / x
 664         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 665     }
 666
 667     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 668         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 669         return AVERROR(EINVAL);
 670     }
 671
 672     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 673         s->intra_quant_bias = avctx->intra_quant_bias;
 674     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 675         s->inter_quant_bias = avctx->inter_quant_bias;
 676
 677     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 678
 679     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 680         s->avctx->time_base.den > (1 << 16) - 1) {
 681         av_log(avctx, AV_LOG_ERROR,
 682                "timebase %d/%d not supported by MPEG 4 standard, "
 683                "the maximum admitted value for the timebase denominator "
 684                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 685                (1 << 16) - 1);
 686         return -1;
 687     }
 688     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 689
 690     switch (avctx->codec->id) {
 691     case AV_CODEC_ID_MPEG1VIDEO:
 692         s->out_format = FMT_MPEG1;
 693         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 694         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 695         break;
 696     case AV_CODEC_ID_MPEG2VIDEO:
 697         s->out_format = FMT_MPEG1;
 698         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 699         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 700         s->rtp_mode   = 1;
 701         break;
 702     case AV_CODEC_ID_MJPEG:
 703     case AV_CODEC_ID_AMV:
 704         s->out_format = FMT_MJPEG;
 705         s->intra_only = 1; /* force intra only for jpeg */
 706         if (!CONFIG_MJPEG_ENCODER ||
 707             ff_mjpeg_encode_init(s) < 0)
 708             return -1;
 709         avctx->delay = 0;
 710         s->low_delay = 1;
 711         break;
 712     case AV_CODEC_ID_H261:
 713         if (!CONFIG_H261_ENCODER)
 714             return -1;
 715         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 716             av_log(avctx, AV_LOG_ERROR,
 717                    "The specified picture size of %dx%d is not valid for the "
 718                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 719                     s->width, s->height);
 720             return -1;
 721         }
 722         s->out_format = FMT_H261;
 723         avctx->delay  = 0;
 724         s->low_delay  = 1;
 725         break;
 726     case AV_CODEC_ID_H263:
 727         if (!CONFIG_H263_ENCODER)
 728             return -1;
 729         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 730                              s->width, s->height) == 8) {
 731             av_log(avctx, AV_LOG_ERROR,
 732                    "The specified picture size of %dx%d is not valid for "
 733                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 734                    "352x288, 704x576, and 1408x1152. "
 735                    "Try H.263+.\n", s->width, s->height);
 736             return -1;
 737         }
 738         s->out_format = FMT_H263;
 739         avctx->delay  = 0;
 740         s->low_delay  = 1;
 741         break;
 742     case AV_CODEC_ID_H263P:
 743         s->out_format = FMT_H263;
 744         s->h263_plus  = 1;
 745         /* Fx */
 746         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 747         s->modified_quant  = s->h263_aic;
 748         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 749         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 750
 751         /* /Fx */
 752         /* These are just to be sure */
 753         avctx->delay = 0;
 754         s->low_delay = 1;
 755         break;
 756     case AV_CODEC_ID_FLV1:
 757         s->out_format      = FMT_H263;
 758         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 759         s->unrestricted_mv = 1;
 760         s->rtp_mode  = 0; /* don't allow GOB */
 761         avctx->delay = 0;
 762         s->low_delay = 1;
 763         break;
 764     case AV_CODEC_ID_RV10:
 765         s->out_format = FMT_H263;
 766         avctx->delay  = 0;
 767         s->low_delay  = 1;
 768         break;
 769     case AV_CODEC_ID_RV20:
 770         s->out_format      = FMT_H263;
 771         avctx->delay       = 0;
 772         s->low_delay       = 1;
 773         s->modified_quant  = 1;
 774         s->h263_aic        = 1;
 775         s->h263_plus       = 1;
 776         s->loop_filter     = 1;
 777         s->unrestricted_mv = 0;
 778         break;
 779     case AV_CODEC_ID_MPEG4:
 780         s->out_format      = FMT_H263;
 781         s->h263_pred       = 1;
 782         s->unrestricted_mv = 1;
 783         s->low_delay       = s->max_b_frames ? 0 : 1;
 784         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 785         break;
 786     case AV_CODEC_ID_MSMPEG4V2:
 787         s->out_format      = FMT_H263;
 788         s->h263_pred       = 1;
 789         s->unrestricted_mv = 1;
 790         s->msmpeg4_version = 2;
 791         avctx->delay       = 0;
 792         s->low_delay       = 1;
 793         break;
 794     case AV_CODEC_ID_MSMPEG4V3:
 795         s->out_format        = FMT_H263;
 796         s->h263_pred         = 1;
 797         s->unrestricted_mv   = 1;
 798         s->msmpeg4_version   = 3;
 799         s->flipflop_rounding = 1;
 800         avctx->delay         = 0;
 801         s->low_delay         = 1;
 802         break;
 803     case AV_CODEC_ID_WMV1:
 804         s->out_format        = FMT_H263;
 805         s->h263_pred         = 1;
 806         s->unrestricted_mv   = 1;
 807         s->msmpeg4_version   = 4;
 808         s->flipflop_rounding = 1;
 809         avctx->delay         = 0;
 810         s->low_delay         = 1;
 811         break;
 812     case AV_CODEC_ID_WMV2:
 813         s->out_format        = FMT_H263;
 814         s->h263_pred         = 1;
 815         s->unrestricted_mv   = 1;
 816         s->msmpeg4_version   = 5;
 817         s->flipflop_rounding = 1;
 818         avctx->delay         = 0;
 819         s->low_delay         = 1;
 820         break;
 821     default:
 822         return -1;
 823     }
 824
 825     avctx->has_b_frames = !s->low_delay;
 826
 827     s->encoding = 1;
 828
 829     s->progressive_frame    =
 830     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 831                                                 CODEC_FLAG_INTERLACED_ME) ||
 832                                 s->alternate_scan);
 833
 834     /* init */
 835     if (ff_MPV_common_init(s) < 0)
 836         return -1;
 837
 838     ff_fdctdsp_init(&s->fdsp, avctx);
 839     ff_me_cmp_init(&s->mecc, avctx);
 840     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 841     ff_pixblockdsp_init(&s->pdsp, avctx);
 842     ff_qpeldsp_init(&s->qdsp);
 843
 844     s->avctx->coded_frame = s->current_picture.f;
 845
 846     if (s->msmpeg4_version) {
 847         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 848                           2 * 2 * (MAX_LEVEL + 1) *
 849                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 850     }
 851     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 852
 853     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 854     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 855     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 856     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 857     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 858     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 859     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 860                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 861     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 862                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 863
 864     if (s->avctx->noise_reduction) {
 865         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 866                           2 * 64 * sizeof(uint16_t), fail);
 867     }
 868
 869     ff_dct_encode_init(s);
 870
 871     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 872         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 873
 874     s->quant_precision = 5;
 875
 876     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 877     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
 878
 879     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 880         ff_h261_encode_init(s);
 881     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 882         ff_h263_encode_init(s);
 883     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 884         ff_msmpeg4_encode_init(s);
 885     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 886         && s->out_format == FMT_MPEG1)
 887         ff_mpeg1_encode_init(s);
 888
 889     /* init q matrix */
 890     for (i = 0; i < 64; i++) {
 891         int j = s->idsp.idct_permutation[i];
 892         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 893             s->mpeg_quant) {
 894             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 895             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 896         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 897             s->intra_matrix[j] =
 898             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 899         } else {
 900             /* mpeg1/2 */
 901             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 902             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 903         }
 904         if (s->avctx->intra_matrix)
 905             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 906         if (s->avctx->inter_matrix)
 907             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 908     }
 909
 910     /* precompute matrix */
 911     /* for mjpeg, we do include qscale in the matrix */
 912     if (s->out_format != FMT_MJPEG) {
 913         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 914                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 915                           31, 1);
 916         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 917                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 918                           31, 0);
 919     }
 920
 921     if (ff_rate_control_init(s) < 0)
 922         return -1;
 923
 924 #if FF_API_ERROR_RATE
 925     FF_DISABLE_DEPRECATION_WARNINGS
 926     if (avctx->error_rate)
 927         s->error_rate = avctx->error_rate;
 928     FF_ENABLE_DEPRECATION_WARNINGS;
 929 #endif
 930
 931 #if FF_API_NORMALIZE_AQP
 932     FF_DISABLE_DEPRECATION_WARNINGS
 933     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 934         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 935     FF_ENABLE_DEPRECATION_WARNINGS;
 936 #endif
 937
 938 #if FF_API_MV0
 939     FF_DISABLE_DEPRECATION_WARNINGS
 940     if (avctx->flags & CODEC_FLAG_MV0)
 941         s->mpv_flags |= FF_MPV_FLAG_MV0;
 942     FF_ENABLE_DEPRECATION_WARNINGS
 943 #endif
 944
 945     if (avctx->b_frame_strategy == 2) {
 946         for (i = 0; i < s->max_b_frames + 2; i++) {
 947             s->tmp_frames[i] = av_frame_alloc();
 948             if (!s->tmp_frames[i])
 949                 return AVERROR(ENOMEM);
 950
 951             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 952             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 953             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 954
 955             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 956             if (ret < 0)
 957                 return ret;
 958         }
 959     }
 960
 961     return 0;
 962 fail:
 963     ff_MPV_encode_end(avctx);
 964     return AVERROR_UNKNOWN;
 965 }
 966
 967 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
 968 {
 969     MpegEncContext *s = avctx->priv_data;
 970     int i;
 971
 972     ff_rate_control_uninit(s);
 973
 974     ff_MPV_common_end(s);
 975     if (CONFIG_MJPEG_ENCODER &&
 976         s->out_format == FMT_MJPEG)
 977         ff_mjpeg_encode_close(s);
 978
 979     av_freep(&avctx->extradata);
 980
 981     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 982         av_frame_free(&s->tmp_frames[i]);
 983
 984     ff_free_picture_tables(&s->new_picture);
 985     ff_mpeg_unref_picture(s, &s->new_picture);
 986
 987     av_freep(&s->avctx->stats_out);
 988     av_freep(&s->ac_stats);
 989
 990     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
 991     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
 992     s->q_chroma_intra_matrix=   NULL;
 993     s->q_chroma_intra_matrix16= NULL;
 994     av_freep(&s->q_intra_matrix);
 995     av_freep(&s->q_inter_matrix);
 996     av_freep(&s->q_intra_matrix16);
 997     av_freep(&s->q_inter_matrix16);
 998     av_freep(&s->input_picture);
 999     av_freep(&s->reordered_input_picture);
1000     av_freep(&s->dct_offset);
1001
1002     return 0;
1003 }
1004
1005 static int get_sae(uint8_t *src, int ref, int stride)
1006 {
1007     int x,y;
1008     int acc = 0;
1009
1010     for (y = 0; y < 16; y++) {
1011         for (x = 0; x < 16; x++) {
1012             acc += FFABS(src[x + y * stride] - ref);
1013         }
1014     }
1015
1016     return acc;
1017 }
1018
1019 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1020                            uint8_t *ref, int stride)
1021 {
1022     int x, y, w, h;
1023     int acc = 0;
1024
1025     w = s->width  & ~15;
1026     h = s->height & ~15;
1027
1028     for (y = 0; y < h; y += 16) {
1029         for (x = 0; x < w; x += 16) {
1030             int offset = x + y * stride;
1031             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1032                                       stride, 16);
1033             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1034             int sae  = get_sae(src + offset, mean, stride);
1035
1036             acc += sae + 500 < sad;
1037         }
1038     }
1039     return acc;
1040 }
1041
1042
1043 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1044 {
1045     Picture *pic = NULL;
1046     int64_t pts;
1047     int i, display_picture_number = 0, ret;
1048     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1049                                                  (s->low_delay ? 0 : 1);
1050     int direct = 1;
1051
1052     if (pic_arg) {
1053         pts = pic_arg->pts;
1054         display_picture_number = s->input_picture_number++;
1055
1056         if (pts != AV_NOPTS_VALUE) {
1057             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1058                 int64_t last = s->user_specified_pts;
1059
1060                 if (pts <= last) {
1061                     av_log(s->avctx, AV_LOG_ERROR,
1062                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1063                            pts, last);
1064                     return AVERROR(EINVAL);
1065                 }
1066
1067                 if (!s->low_delay && display_picture_number == 1)
1068                     s->dts_delta = pts - last;
1069             }
1070             s->user_specified_pts = pts;
1071         } else {
1072             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1073                 s->user_specified_pts =
1074                 pts = s->user_specified_pts + 1;
1075                 av_log(s->avctx, AV_LOG_INFO,
1076                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1077                        pts);
1078             } else {
1079                 pts = display_picture_number;
1080             }
1081         }
1082     }
1083
1084     if (pic_arg) {
1085         if (!pic_arg->buf[0])
1086             direct = 0;
1087         if (pic_arg->linesize[0] != s->linesize)
1088             direct = 0;
1089         if (pic_arg->linesize[1] != s->uvlinesize)
1090             direct = 0;
1091         if (pic_arg->linesize[2] != s->uvlinesize)
1092             direct = 0;
1093         if ((s->width & 15) || (s->height & 15))
1094             direct = 0;
1095         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1096             direct = 0;
1097         if (s->linesize & (STRIDE_ALIGN-1))
1098             direct = 0;
1099
1100         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1101                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1102
1103         if (direct) {
1104             i = ff_find_unused_picture(s, 1);
1105             if (i < 0)
1106                 return i;
1107
1108             pic = &s->picture[i];
1109             pic->reference = 3;
1110
1111             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1112                 return ret;
1113             if (ff_alloc_picture(s, pic, 1) < 0) {
1114                 return -1;
1115             }
1116         } else {
1117             i = ff_find_unused_picture(s, 0);
1118             if (i < 0)
1119                 return i;
1120
1121             pic = &s->picture[i];
1122             pic->reference = 3;
1123
1124             if (ff_alloc_picture(s, pic, 0) < 0) {
1125                 return -1;
1126             }
1127
1128             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1129                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1130                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1131                 // empty
1132             } else {
1133                 int h_chroma_shift, v_chroma_shift;
1134                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1135                                                  &h_chroma_shift,
1136                                                  &v_chroma_shift);
1137
1138                 for (i = 0; i < 3; i++) {
1139                     int src_stride = pic_arg->linesize[i];
1140                     int dst_stride = i ? s->uvlinesize : s->linesize;
1141                     int h_shift = i ? h_chroma_shift : 0;
1142                     int v_shift = i ? v_chroma_shift : 0;
1143                     int w = s->width  >> h_shift;
1144                     int h = s->height >> v_shift;
1145                     uint8_t *src = pic_arg->data[i];
1146                     uint8_t *dst = pic->f->data[i];
1147                     int vpad = 16;
1148
1149                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1150                         && !s->progressive_sequence
1151                         && FFALIGN(s->height, 32) - s->height > 16)
1152                         vpad = 32;
1153
1154                     if (!s->avctx->rc_buffer_size)
1155                         dst += INPLACE_OFFSET;
1156
1157                     if (src_stride == dst_stride)
1158                         memcpy(dst, src, src_stride * h);
1159                     else {
1160                         int h2 = h;
1161                         uint8_t *dst2 = dst;
1162                         while (h2--) {
1163                             memcpy(dst2, src, w);
1164                             dst2 += dst_stride;
1165                             src += src_stride;
1166                         }
1167                     }
1168                     if ((s->width & 15) || (s->height & (vpad-1))) {
1169                         s->mpvencdsp.draw_edges(dst, dst_stride,
1170                                                 w, h,
1171                                                 16>>h_shift,
1172                                                 vpad>>v_shift,
1173                                                 EDGE_BOTTOM);
1174                     }
1175                 }
1176             }
1177         }
1178         ret = av_frame_copy_props(pic->f, pic_arg);
1179         if (ret < 0)
1180             return ret;
1181
1182         pic->f->display_picture_number = display_picture_number;
1183         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1184     }
1185
1186     /* shift buffer entries */
1187     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1188         s->input_picture[i - 1] = s->input_picture[i];
1189
1190     s->input_picture[encoding_delay] = (Picture*) pic;
1191
1192     return 0;
1193 }
1194
1195 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1196 {
1197     int x, y, plane;
1198     int score = 0;
1199     int64_t score64 = 0;
1200
1201     for (plane = 0; plane < 3; plane++) {
1202         const int stride = p->f->linesize[plane];
1203         const int bw = plane ? 1 : 2;
1204         for (y = 0; y < s->mb_height * bw; y++) {
1205             for (x = 0; x < s->mb_width * bw; x++) {
1206                 int off = p->shared ? 0 : 16;
1207                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1208                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1209                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1210
1211                 switch (FFABS(s->avctx->frame_skip_exp)) {
1212                 case 0: score    =  FFMAX(score, v);          break;
1213                 case 1: score   += FFABS(v);                  break;
1214                 case 2: score64 += v * (int64_t)v;                       break;
1215                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1216                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1217                 }
1218             }
1219         }
1220     }
1221     emms_c();
1222
1223     if (score)
1224         score64 = score;
1225     if (s->avctx->frame_skip_exp < 0)
1226         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1227                       -1.0/s->avctx->frame_skip_exp);
1228
1229     if (score64 < s->avctx->frame_skip_threshold)
1230         return 1;
1231     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1232         return 1;
1233     return 0;
1234 }
1235
1236 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1237 {
1238     AVPacket pkt = { 0 };
1239     int ret, got_output;
1240
1241     av_init_packet(&pkt);
1242     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1243     if (ret < 0)
1244         return ret;
1245
1246     ret = pkt.size;
1247     av_free_packet(&pkt);
1248     return ret;
1249 }
1250
1251 static int estimate_best_b_count(MpegEncContext *s)
1252 {
1253     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1254     AVCodecContext *c = avcodec_alloc_context3(NULL);
1255     const int scale = s->avctx->brd_scale;
1256     int i, j, out_size, p_lambda, b_lambda, lambda2;
1257     int64_t best_rd  = INT64_MAX;
1258     int best_b_count = -1;
1259
1260     av_assert0(scale >= 0 && scale <= 3);
1261
1262     //emms_c();
1263     //s->next_picture_ptr->quality;
1264     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1265     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1266     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1267     if (!b_lambda) // FIXME we should do this somewhere else
1268         b_lambda = p_lambda;
1269     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1270                FF_LAMBDA_SHIFT;
1271
1272     c->width        = s->width  >> scale;
1273     c->height       = s->height >> scale;
1274     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1275     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1276     c->mb_decision  = s->avctx->mb_decision;
1277     c->me_cmp       = s->avctx->me_cmp;
1278     c->mb_cmp       = s->avctx->mb_cmp;
1279     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1280     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1281     c->time_base    = s->avctx->time_base;
1282     c->max_b_frames = s->max_b_frames;
1283
1284     if (avcodec_open2(c, codec, NULL) < 0)
1285         return -1;
1286
1287     for (i = 0; i < s->max_b_frames + 2; i++) {
1288         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1289                                                 s->next_picture_ptr;
1290         uint8_t *data[4];
1291
1292         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1293             pre_input = *pre_input_ptr;
1294             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1295
1296             if (!pre_input.shared && i) {
1297                 data[0] += INPLACE_OFFSET;
1298                 data[1] += INPLACE_OFFSET;
1299                 data[2] += INPLACE_OFFSET;
1300             }
1301
1302             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1303                                        s->tmp_frames[i]->linesize[0],
1304                                        data[0],
1305                                        pre_input.f->linesize[0],
1306                                        c->width, c->height);
1307             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1308                                        s->tmp_frames[i]->linesize[1],
1309                                        data[1],
1310                                        pre_input.f->linesize[1],
1311                                        c->width >> 1, c->height >> 1);
1312             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1313                                        s->tmp_frames[i]->linesize[2],
1314                                        data[2],
1315                                        pre_input.f->linesize[2],
1316                                        c->width >> 1, c->height >> 1);
1317         }
1318     }
1319
1320     for (j = 0; j < s->max_b_frames + 1; j++) {
1321         int64_t rd = 0;
1322
1323         if (!s->input_picture[j])
1324             break;
1325
1326         c->error[0] = c->error[1] = c->error[2] = 0;
1327
1328         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1329         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1330
1331         out_size = encode_frame(c, s->tmp_frames[0]);
1332
1333         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1334
1335         for (i = 0; i < s->max_b_frames + 1; i++) {
1336             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1337
1338             s->tmp_frames[i + 1]->pict_type = is_p ?
1339                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1340             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1341
1342             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1343
1344             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1345         }
1346
1347         /* get the delayed frames */
1348         while (out_size) {
1349             out_size = encode_frame(c, NULL);
1350             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1351         }
1352
1353         rd += c->error[0] + c->error[1] + c->error[2];
1354
1355         if (rd < best_rd) {
1356             best_rd = rd;
1357             best_b_count = j;
1358         }
1359     }
1360
1361     avcodec_close(c);
1362     av_freep(&c);
1363
1364     return best_b_count;
1365 }
1366
1367 static int select_input_picture(MpegEncContext *s)
1368 {
1369     int i, ret;
1370
1371     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1372         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1373     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1374
1375     /* set next picture type & ordering */
1376     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1377         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1378             if (s->picture_in_gop_number < s->gop_size &&
1379                 s->next_picture_ptr &&
1380                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1381                 // FIXME check that te gop check above is +-1 correct
1382                 av_frame_unref(s->input_picture[0]->f);
1383
1384                 ff_vbv_update(s, 0);
1385
1386                 goto no_output_pic;
1387             }
1388         }
1389
1390         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1391             s->next_picture_ptr == NULL || s->intra_only) {
1392             s->reordered_input_picture[0] = s->input_picture[0];
1393             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1394             s->reordered_input_picture[0]->f->coded_picture_number =
1395                 s->coded_picture_number++;
1396         } else {
1397             int b_frames;
1398
1399             if (s->flags & CODEC_FLAG_PASS2) {
1400                 for (i = 0; i < s->max_b_frames + 1; i++) {
1401                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1402
1403                     if (pict_num >= s->rc_context.num_entries)
1404                         break;
1405                     if (!s->input_picture[i]) {
1406                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1407                         break;
1408                     }
1409
1410                     s->input_picture[i]->f->pict_type =
1411                         s->rc_context.entry[pict_num].new_pict_type;
1412                 }
1413             }
1414
1415             if (s->avctx->b_frame_strategy == 0) {
1416                 b_frames = s->max_b_frames;
1417                 while (b_frames && !s->input_picture[b_frames])
1418                     b_frames--;
1419             } else if (s->avctx->b_frame_strategy == 1) {
1420                 for (i = 1; i < s->max_b_frames + 1; i++) {
1421                     if (s->input_picture[i] &&
1422                         s->input_picture[i]->b_frame_score == 0) {
1423                         s->input_picture[i]->b_frame_score =
1424                             get_intra_count(s,
1425                                             s->input_picture[i    ]->f->data[0],
1426                                             s->input_picture[i - 1]->f->data[0],
1427                                             s->linesize) + 1;
1428                     }
1429                 }
1430                 for (i = 0; i < s->max_b_frames + 1; i++) {
1431                     if (s->input_picture[i] == NULL ||
1432                         s->input_picture[i]->b_frame_score - 1 >
1433                             s->mb_num / s->avctx->b_sensitivity)
1434                         break;
1435                 }
1436
1437                 b_frames = FFMAX(0, i - 1);
1438
1439                 /* reset scores */
1440                 for (i = 0; i < b_frames + 1; i++) {
1441                     s->input_picture[i]->b_frame_score = 0;
1442                 }
1443             } else if (s->avctx->b_frame_strategy == 2) {
1444                 b_frames = estimate_best_b_count(s);
1445             } else {
1446                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1447                 b_frames = 0;
1448             }
1449
1450             emms_c();
1451
1452             for (i = b_frames - 1; i >= 0; i--) {
1453                 int type = s->input_picture[i]->f->pict_type;
1454                 if (type && type != AV_PICTURE_TYPE_B)
1455                     b_frames = i;
1456             }
1457             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1458                 b_frames == s->max_b_frames) {
1459                 av_log(s->avctx, AV_LOG_ERROR,
1460                        "warning, too many b frames in a row\n");
1461             }
1462
1463             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1464                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1465                     s->gop_size > s->picture_in_gop_number) {
1466                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1467                 } else {
1468                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1469                         b_frames = 0;
1470                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1471                 }
1472             }
1473
1474             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1475                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1476                 b_frames--;
1477
1478             s->reordered_input_picture[0] = s->input_picture[b_frames];
1479             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1480                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1481             s->reordered_input_picture[0]->f->coded_picture_number =
1482                 s->coded_picture_number++;
1483             for (i = 0; i < b_frames; i++) {
1484                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1485                 s->reordered_input_picture[i + 1]->f->pict_type =
1486                     AV_PICTURE_TYPE_B;
1487                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1488                     s->coded_picture_number++;
1489             }
1490         }
1491     }
1492 no_output_pic:
1493     if (s->reordered_input_picture[0]) {
1494         s->reordered_input_picture[0]->reference =
1495            s->reordered_input_picture[0]->f->pict_type !=
1496                AV_PICTURE_TYPE_B ? 3 : 0;
1497
1498         ff_mpeg_unref_picture(s, &s->new_picture);
1499         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1500             return ret;
1501
1502         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1503             // input is a shared pix, so we can't modifiy it -> alloc a new
1504             // one & ensure that the shared one is reuseable
1505
1506             Picture *pic;
1507             int i = ff_find_unused_picture(s, 0);
1508             if (i < 0)
1509                 return i;
1510             pic = &s->picture[i];
1511
1512             pic->reference = s->reordered_input_picture[0]->reference;
1513             if (ff_alloc_picture(s, pic, 0) < 0) {
1514                 return -1;
1515             }
1516
1517             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1518             if (ret < 0)
1519                 return ret;
1520
1521             /* mark us unused / free shared pic */
1522             av_frame_unref(s->reordered_input_picture[0]->f);
1523             s->reordered_input_picture[0]->shared = 0;
1524
1525             s->current_picture_ptr = pic;
1526         } else {
1527             // input is not a shared pix -> reuse buffer for current_pix
1528             s->current_picture_ptr = s->reordered_input_picture[0];
1529             for (i = 0; i < 4; i++) {
1530                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1531             }
1532         }
1533         ff_mpeg_unref_picture(s, &s->current_picture);
1534         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1535                                        s->current_picture_ptr)) < 0)
1536             return ret;
1537
1538         s->picture_number = s->new_picture.f->display_picture_number;
1539     } else {
1540         ff_mpeg_unref_picture(s, &s->new_picture);
1541     }
1542     return 0;
1543 }
1544
1545 static void frame_end(MpegEncContext *s)
1546 {
1547     if (s->unrestricted_mv &&
1548         s->current_picture.reference &&
1549         !s->intra_only) {
1550         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1551         int hshift = desc->log2_chroma_w;
1552         int vshift = desc->log2_chroma_h;
1553         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1554                                 s->current_picture.f->linesize[0],
1555                                 s->h_edge_pos, s->v_edge_pos,
1556                                 EDGE_WIDTH, EDGE_WIDTH,
1557                                 EDGE_TOP | EDGE_BOTTOM);
1558         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1559                                 s->current_picture.f->linesize[1],
1560                                 s->h_edge_pos >> hshift,
1561                                 s->v_edge_pos >> vshift,
1562                                 EDGE_WIDTH >> hshift,
1563                                 EDGE_WIDTH >> vshift,
1564                                 EDGE_TOP | EDGE_BOTTOM);
1565         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1566                                 s->current_picture.f->linesize[2],
1567                                 s->h_edge_pos >> hshift,
1568                                 s->v_edge_pos >> vshift,
1569                                 EDGE_WIDTH >> hshift,
1570                                 EDGE_WIDTH >> vshift,
1571                                 EDGE_TOP | EDGE_BOTTOM);
1572     }
1573
1574     emms_c();
1575
1576     s->last_pict_type                 = s->pict_type;
1577     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1578     if (s->pict_type!= AV_PICTURE_TYPE_B)
1579         s->last_non_b_pict_type = s->pict_type;
1580
1581     s->avctx->coded_frame = s->current_picture_ptr->f;
1582
1583 }
1584
1585 static void update_noise_reduction(MpegEncContext *s)
1586 {
1587     int intra, i;
1588
1589     for (intra = 0; intra < 2; intra++) {
1590         if (s->dct_count[intra] > (1 << 16)) {
1591             for (i = 0; i < 64; i++) {
1592                 s->dct_error_sum[intra][i] >>= 1;
1593             }
1594             s->dct_count[intra] >>= 1;
1595         }
1596
1597         for (i = 0; i < 64; i++) {
1598             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1599                                        s->dct_count[intra] +
1600                                        s->dct_error_sum[intra][i] / 2) /
1601                                       (s->dct_error_sum[intra][i] + 1);
1602         }
1603     }
1604 }
1605
1606 static int frame_start(MpegEncContext *s)
1607 {
1608     int ret;
1609
1610     /* mark & release old frames */
1611     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1612         s->last_picture_ptr != s->next_picture_ptr &&
1613         s->last_picture_ptr->f->buf[0]) {
1614         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1615     }
1616
1617     s->current_picture_ptr->f->pict_type = s->pict_type;
1618     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1619
1620     ff_mpeg_unref_picture(s, &s->current_picture);
1621     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1622                                    s->current_picture_ptr)) < 0)
1623         return ret;
1624
1625     if (s->pict_type != AV_PICTURE_TYPE_B) {
1626         s->last_picture_ptr = s->next_picture_ptr;
1627         if (!s->droppable)
1628             s->next_picture_ptr = s->current_picture_ptr;
1629     }
1630
1631     if (s->last_picture_ptr) {
1632         ff_mpeg_unref_picture(s, &s->last_picture);
1633         if (s->last_picture_ptr->f->buf[0] &&
1634             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1635                                        s->last_picture_ptr)) < 0)
1636             return ret;
1637     }
1638     if (s->next_picture_ptr) {
1639         ff_mpeg_unref_picture(s, &s->next_picture);
1640         if (s->next_picture_ptr->f->buf[0] &&
1641             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1642                                        s->next_picture_ptr)) < 0)
1643             return ret;
1644     }
1645
1646     if (s->picture_structure!= PICT_FRAME) {
1647         int i;
1648         for (i = 0; i < 4; i++) {
1649             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1650                 s->current_picture.f->data[i] +=
1651                     s->current_picture.f->linesize[i];
1652             }
1653             s->current_picture.f->linesize[i] *= 2;
1654             s->last_picture.f->linesize[i]    *= 2;
1655             s->next_picture.f->linesize[i]    *= 2;
1656         }
1657     }
1658
1659     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1660         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1661         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1662     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1663         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1664         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1665     } else {
1666         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1667         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1668     }
1669
1670     if (s->dct_error_sum) {
1671         av_assert2(s->avctx->noise_reduction && s->encoding);
1672         update_noise_reduction(s);
1673     }
1674
1675     return 0;
1676 }
1677
1678 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1679                           const AVFrame *pic_arg, int *got_packet)
1680 {
1681     MpegEncContext *s = avctx->priv_data;
1682     int i, stuffing_count, ret;
1683     int context_count = s->slice_context_count;
1684
1685     s->picture_in_gop_number++;
1686
1687     if (load_input_picture(s, pic_arg) < 0)
1688         return -1;
1689
1690     if (select_input_picture(s) < 0) {
1691         return -1;
1692     }
1693
1694     /* output? */
1695     if (s->new_picture.f->data[0]) {
1696         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1697             return ret;
1698         if (s->mb_info) {
1699             s->mb_info_ptr = av_packet_new_side_data(pkt,
1700                                  AV_PKT_DATA_H263_MB_INFO,
1701                                  s->mb_width*s->mb_height*12);
1702             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1703         }
1704
1705         for (i = 0; i < context_count; i++) {
1706             int start_y = s->thread_context[i]->start_mb_y;
1707             int   end_y = s->thread_context[i]->  end_mb_y;
1708             int h       = s->mb_height;
1709             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1710             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1711
1712             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1713         }
1714
1715         s->pict_type = s->new_picture.f->pict_type;
1716         //emms_c();
1717         ret = frame_start(s);
1718         if (ret < 0)
1719             return ret;
1720 vbv_retry:
1721         if (encode_picture(s, s->picture_number) < 0)
1722             return -1;
1723
1724         avctx->header_bits = s->header_bits;
1725         avctx->mv_bits     = s->mv_bits;
1726         avctx->misc_bits   = s->misc_bits;
1727         avctx->i_tex_bits  = s->i_tex_bits;
1728         avctx->p_tex_bits  = s->p_tex_bits;
1729         avctx->i_count     = s->i_count;
1730         // FIXME f/b_count in avctx
1731         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1732         avctx->skip_count  = s->skip_count;
1733
1734         frame_end(s);
1735
1736         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1737             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1738
1739         if (avctx->rc_buffer_size) {
1740             RateControlContext *rcc = &s->rc_context;
1741             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1742
1743             if (put_bits_count(&s->pb) > max_size &&
1744                 s->lambda < s->avctx->lmax) {
1745                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1746                                        (s->qscale + 1) / s->qscale);
1747                 if (s->adaptive_quant) {
1748                     int i;
1749                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1750                         s->lambda_table[i] =
1751                             FFMAX(s->lambda_table[i] + 1,
1752                                   s->lambda_table[i] * (s->qscale + 1) /
1753                                   s->qscale);
1754                 }
1755                 s->mb_skipped = 0;        // done in frame_start()
1756                 // done in encode_picture() so we must undo it
1757                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1758                     if (s->flipflop_rounding          ||
1759                         s->codec_id == AV_CODEC_ID_H263P ||
1760                         s->codec_id == AV_CODEC_ID_MPEG4)
1761                         s->no_rounding ^= 1;
1762                 }
1763                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1764                     s->time_base       = s->last_time_base;
1765                     s->last_non_b_time = s->time - s->pp_time;
1766                 }
1767                 for (i = 0; i < context_count; i++) {
1768                     PutBitContext *pb = &s->thread_context[i]->pb;
1769                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1770                 }
1771                 goto vbv_retry;
1772             }
1773
1774             av_assert0(s->avctx->rc_max_rate);
1775         }
1776
1777         if (s->flags & CODEC_FLAG_PASS1)
1778             ff_write_pass1_stats(s);
1779
1780         for (i = 0; i < 4; i++) {
1781             s->current_picture_ptr->f->error[i] =
1782             s->current_picture.f->error[i] =
1783                 s->current_picture.error[i];
1784             avctx->error[i] += s->current_picture_ptr->f->error[i];
1785         }
1786
1787         if (s->flags & CODEC_FLAG_PASS1)
1788             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1789                    avctx->i_tex_bits + avctx->p_tex_bits ==
1790                        put_bits_count(&s->pb));
1791         flush_put_bits(&s->pb);
1792         s->frame_bits  = put_bits_count(&s->pb);
1793
1794         stuffing_count = ff_vbv_update(s, s->frame_bits);
1795         s->stuffing_bits = 8*stuffing_count;
1796         if (stuffing_count) {
1797             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1798                     stuffing_count + 50) {
1799                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1800                 return -1;
1801             }
1802
1803             switch (s->codec_id) {
1804             case AV_CODEC_ID_MPEG1VIDEO:
1805             case AV_CODEC_ID_MPEG2VIDEO:
1806                 while (stuffing_count--) {
1807                     put_bits(&s->pb, 8, 0);
1808                 }
1809             break;
1810             case AV_CODEC_ID_MPEG4:
1811                 put_bits(&s->pb, 16, 0);
1812                 put_bits(&s->pb, 16, 0x1C3);
1813                 stuffing_count -= 4;
1814                 while (stuffing_count--) {
1815                     put_bits(&s->pb, 8, 0xFF);
1816                 }
1817             break;
1818             default:
1819                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1820             }
1821             flush_put_bits(&s->pb);
1822             s->frame_bits  = put_bits_count(&s->pb);
1823         }
1824
1825         /* update mpeg1/2 vbv_delay for CBR */
1826         if (s->avctx->rc_max_rate                          &&
1827             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1828             s->out_format == FMT_MPEG1                     &&
1829             90000LL * (avctx->rc_buffer_size - 1) <=
1830                 s->avctx->rc_max_rate * 0xFFFFLL) {
1831             int vbv_delay, min_delay;
1832             double inbits  = s->avctx->rc_max_rate *
1833                              av_q2d(s->avctx->time_base);
1834             int    minbits = s->frame_bits - 8 *
1835                              (s->vbv_delay_ptr - s->pb.buf - 1);
1836             double bits    = s->rc_context.buffer_index + minbits - inbits;
1837
1838             if (bits < 0)
1839                 av_log(s->avctx, AV_LOG_ERROR,
1840                        "Internal error, negative bits\n");
1841
1842             assert(s->repeat_first_field == 0);
1843
1844             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1845             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1846                         s->avctx->rc_max_rate;
1847
1848             vbv_delay = FFMAX(vbv_delay, min_delay);
1849
1850             av_assert0(vbv_delay < 0xFFFF);
1851
1852             s->vbv_delay_ptr[0] &= 0xF8;
1853             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1854             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1855             s->vbv_delay_ptr[2] &= 0x07;
1856             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1857             avctx->vbv_delay     = vbv_delay * 300;
1858         }
1859         s->total_bits     += s->frame_bits;
1860         avctx->frame_bits  = s->frame_bits;
1861
1862         pkt->pts = s->current_picture.f->pts;
1863         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1864             if (!s->current_picture.f->coded_picture_number)
1865                 pkt->dts = pkt->pts - s->dts_delta;
1866             else
1867                 pkt->dts = s->reordered_pts;
1868             s->reordered_pts = pkt->pts;
1869         } else
1870             pkt->dts = pkt->pts;
1871         if (s->current_picture.f->key_frame)
1872             pkt->flags |= AV_PKT_FLAG_KEY;
1873         if (s->mb_info)
1874             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1875     } else {
1876         s->frame_bits = 0;
1877     }
1878
1879     /* release non-reference frames */
1880     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1881         if (!s->picture[i].reference)
1882             ff_mpeg_unref_picture(s, &s->picture[i]);
1883     }
1884
1885     av_assert1((s->frame_bits & 7) == 0);
1886
1887     pkt->size = s->frame_bits / 8;
1888     *got_packet = !!pkt->size;
1889     return 0;
1890 }
1891
1892 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1893                                                 int n, int threshold)
1894 {
1895     static const char tab[64] = {
1896         3, 2, 2, 1, 1, 1, 1, 1,
1897         1, 1, 1, 1, 1, 1, 1, 1,
1898         1, 1, 1, 1, 1, 1, 1, 1,
1899         0, 0, 0, 0, 0, 0, 0, 0,
1900         0, 0, 0, 0, 0, 0, 0, 0,
1901         0, 0, 0, 0, 0, 0, 0, 0,
1902         0, 0, 0, 0, 0, 0, 0, 0,
1903         0, 0, 0, 0, 0, 0, 0, 0
1904     };
1905     int score = 0;
1906     int run = 0;
1907     int i;
1908     int16_t *block = s->block[n];
1909     const int last_index = s->block_last_index[n];
1910     int skip_dc;
1911
1912     if (threshold < 0) {
1913         skip_dc = 0;
1914         threshold = -threshold;
1915     } else
1916         skip_dc = 1;
1917
1918     /* Are all we could set to zero already zero? */
1919     if (last_index <= skip_dc - 1)
1920         return;
1921
1922     for (i = 0; i <= last_index; i++) {
1923         const int j = s->intra_scantable.permutated[i];
1924         const int level = FFABS(block[j]);
1925         if (level == 1) {
1926             if (skip_dc && i == 0)
1927                 continue;
1928             score += tab[run];
1929             run = 0;
1930         } else if (level > 1) {
1931             return;
1932         } else {
1933             run++;
1934         }
1935     }
1936     if (score >= threshold)
1937         return;
1938     for (i = skip_dc; i <= last_index; i++) {
1939         const int j = s->intra_scantable.permutated[i];
1940         block[j] = 0;
1941     }
1942     if (block[0])
1943         s->block_last_index[n] = 0;
1944     else
1945         s->block_last_index[n] = -1;
1946 }
1947
1948 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1949                                int last_index)
1950 {
1951     int i;
1952     const int maxlevel = s->max_qcoeff;
1953     const int minlevel = s->min_qcoeff;
1954     int overflow = 0;
1955
1956     if (s->mb_intra) {
1957         i = 1; // skip clipping of intra dc
1958     } else
1959         i = 0;
1960
1961     for (; i <= last_index; i++) {
1962         const int j = s->intra_scantable.permutated[i];
1963         int level = block[j];
1964
1965         if (level > maxlevel) {
1966             level = maxlevel;
1967             overflow++;
1968         } else if (level < minlevel) {
1969             level = minlevel;
1970             overflow++;
1971         }
1972
1973         block[j] = level;
1974     }
1975
1976     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1977         av_log(s->avctx, AV_LOG_INFO,
1978                "warning, clipping %d dct coefficients to %d..%d\n",
1979                overflow, minlevel, maxlevel);
1980 }
1981
1982 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1983 {
1984     int x, y;
1985     // FIXME optimize
1986     for (y = 0; y < 8; y++) {
1987         for (x = 0; x < 8; x++) {
1988             int x2, y2;
1989             int sum = 0;
1990             int sqr = 0;
1991             int count = 0;
1992
1993             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1994                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1995                     int v = ptr[x2 + y2 * stride];
1996                     sum += v;
1997                     sqr += v * v;
1998                     count++;
1999                 }
2000             }
2001             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2002         }
2003     }
2004 }
2005
2006 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2007                                                 int motion_x, int motion_y,
2008                                                 int mb_block_height,
2009                                                 int mb_block_width,
2010                                                 int mb_block_count)
2011 {
2012     int16_t weight[12][64];
2013     int16_t orig[12][64];
2014     const int mb_x = s->mb_x;
2015     const int mb_y = s->mb_y;
2016     int i;
2017     int skip_dct[12];
2018     int dct_offset = s->linesize * 8; // default for progressive frames
2019     int uv_dct_offset = s->uvlinesize * 8;
2020     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2021     ptrdiff_t wrap_y, wrap_c;
2022
2023     for (i = 0; i < mb_block_count; i++)
2024         skip_dct[i] = s->skipdct;
2025
2026     if (s->adaptive_quant) {
2027         const int last_qp = s->qscale;
2028         const int mb_xy = mb_x + mb_y * s->mb_stride;
2029
2030         s->lambda = s->lambda_table[mb_xy];
2031         update_qscale(s);
2032
2033         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2034             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2035             s->dquant = s->qscale - last_qp;
2036
2037             if (s->out_format == FMT_H263) {
2038                 s->dquant = av_clip(s->dquant, -2, 2);
2039
2040                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2041                     if (!s->mb_intra) {
2042                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2043                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2044                                 s->dquant = 0;
2045                         }
2046                         if (s->mv_type == MV_TYPE_8X8)
2047                             s->dquant = 0;
2048                     }
2049                 }
2050             }
2051         }
2052         ff_set_qscale(s, last_qp + s->dquant);
2053     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2054         ff_set_qscale(s, s->qscale + s->dquant);
2055
2056     wrap_y = s->linesize;
2057     wrap_c = s->uvlinesize;
2058     ptr_y  = s->new_picture.f->data[0] +
2059              (mb_y * 16 * wrap_y)              + mb_x * 16;
2060     ptr_cb = s->new_picture.f->data[1] +
2061              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2062     ptr_cr = s->new_picture.f->data[2] +
2063              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2064
2065     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2066         uint8_t *ebuf = s->edge_emu_buffer + 36 * wrap_y;
2067         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2068         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2069         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2070                                  wrap_y, wrap_y,
2071                                  16, 16, mb_x * 16, mb_y * 16,
2072                                  s->width, s->height);
2073         ptr_y = ebuf;
2074         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2075                                  wrap_c, wrap_c,
2076                                  mb_block_width, mb_block_height,
2077                                  mb_x * mb_block_width, mb_y * mb_block_height,
2078                                  cw, ch);
2079         ptr_cb = ebuf + 16 * wrap_y;
2080         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2081                                  wrap_c, wrap_c,
2082                                  mb_block_width, mb_block_height,
2083                                  mb_x * mb_block_width, mb_y * mb_block_height,
2084                                  cw, ch);
2085         ptr_cr = ebuf + 16 * wrap_y + 16;
2086     }
2087
2088     if (s->mb_intra) {
2089         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2090             int progressive_score, interlaced_score;
2091
2092             s->interlaced_dct = 0;
2093             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2094                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2095                                                      NULL, wrap_y, 8) - 400;
2096
2097             if (progressive_score > 0) {
2098                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2099                                                         NULL, wrap_y * 2, 8) +
2100                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2101                                                         NULL, wrap_y * 2, 8);
2102                 if (progressive_score > interlaced_score) {
2103                     s->interlaced_dct = 1;
2104
2105                     dct_offset = wrap_y;
2106                     uv_dct_offset = wrap_c;
2107                     wrap_y <<= 1;
2108                     if (s->chroma_format == CHROMA_422 ||
2109                         s->chroma_format == CHROMA_444)
2110                         wrap_c <<= 1;
2111                 }
2112             }
2113         }
2114
2115         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2116         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2117         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2118         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2119
2120         if (s->flags & CODEC_FLAG_GRAY) {
2121             skip_dct[4] = 1;
2122             skip_dct[5] = 1;
2123         } else {
2124             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2125             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2126             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2127                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2128                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2129             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2130                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2131                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2132                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2133                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2134                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2135                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2136             }
2137         }
2138     } else {
2139         op_pixels_func (*op_pix)[4];
2140         qpel_mc_func (*op_qpix)[16];
2141         uint8_t *dest_y, *dest_cb, *dest_cr;
2142
2143         dest_y  = s->dest[0];
2144         dest_cb = s->dest[1];
2145         dest_cr = s->dest[2];
2146
2147         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2148             op_pix  = s->hdsp.put_pixels_tab;
2149             op_qpix = s->qdsp.put_qpel_pixels_tab;
2150         } else {
2151             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2152             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2153         }
2154
2155         if (s->mv_dir & MV_DIR_FORWARD) {
2156             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
2157                           s->last_picture.f->data,
2158                           op_pix, op_qpix);
2159             op_pix  = s->hdsp.avg_pixels_tab;
2160             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2161         }
2162         if (s->mv_dir & MV_DIR_BACKWARD) {
2163             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
2164                           s->next_picture.f->data,
2165                           op_pix, op_qpix);
2166         }
2167
2168         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2169             int progressive_score, interlaced_score;
2170
2171             s->interlaced_dct = 0;
2172             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2173                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2174                                                      ptr_y + wrap_y * 8,
2175                                                      wrap_y, 8) - 400;
2176
2177             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2178                 progressive_score -= 400;
2179
2180             if (progressive_score > 0) {
2181                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2182                                                         wrap_y * 2, 8) +
2183                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2184                                                         ptr_y + wrap_y,
2185                                                         wrap_y * 2, 8);
2186
2187                 if (progressive_score > interlaced_score) {
2188                     s->interlaced_dct = 1;
2189
2190                     dct_offset = wrap_y;
2191                     uv_dct_offset = wrap_c;
2192                     wrap_y <<= 1;
2193                     if (s->chroma_format == CHROMA_422)
2194                         wrap_c <<= 1;
2195                 }
2196             }
2197         }
2198
2199         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2200         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2201         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2202                             dest_y + dct_offset, wrap_y);
2203         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2204                             dest_y + dct_offset + 8, wrap_y);
2205
2206         if (s->flags & CODEC_FLAG_GRAY) {
2207             skip_dct[4] = 1;
2208             skip_dct[5] = 1;
2209         } else {
2210             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2211             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2212             if (!s->chroma_y_shift) { /* 422 */
2213                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2214                                     dest_cb + uv_dct_offset, wrap_c);
2215                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2216                                     dest_cr + uv_dct_offset, wrap_c);
2217             }
2218         }
2219         /* pre quantization */
2220         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2221                 2 * s->qscale * s->qscale) {
2222             // FIXME optimize
2223             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2224                 skip_dct[0] = 1;
2225             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2226                 skip_dct[1] = 1;
2227             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2228                                wrap_y, 8) < 20 * s->qscale)
2229                 skip_dct[2] = 1;
2230             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2231                                wrap_y, 8) < 20 * s->qscale)
2232                 skip_dct[3] = 1;
2233             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2234                 skip_dct[4] = 1;
2235             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2236                 skip_dct[5] = 1;
2237             if (!s->chroma_y_shift) { /* 422 */
2238                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2239                                    dest_cb + uv_dct_offset,
2240                                    wrap_c, 8) < 20 * s->qscale)
2241                     skip_dct[6] = 1;
2242                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2243                                    dest_cr + uv_dct_offset,
2244                                    wrap_c, 8) < 20 * s->qscale)
2245                     skip_dct[7] = 1;
2246             }
2247         }
2248     }
2249
2250     if (s->quantizer_noise_shaping) {
2251         if (!skip_dct[0])
2252             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2253         if (!skip_dct[1])
2254             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2255         if (!skip_dct[2])
2256             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2257         if (!skip_dct[3])
2258             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2259         if (!skip_dct[4])
2260             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2261         if (!skip_dct[5])
2262             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2263         if (!s->chroma_y_shift) { /* 422 */
2264             if (!skip_dct[6])
2265                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2266                                   wrap_c);
2267             if (!skip_dct[7])
2268                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2269                                   wrap_c);
2270         }
2271         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2272     }
2273
2274     /* DCT & quantize */
2275     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2276     {
2277         for (i = 0; i < mb_block_count; i++) {
2278             if (!skip_dct[i]) {
2279                 int overflow;
2280                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2281                 // FIXME we could decide to change to quantizer instead of
2282                 // clipping
2283                 // JS: I don't think that would be a good idea it could lower
2284                 //     quality instead of improve it. Just INTRADC clipping
2285                 //     deserves changes in quantizer
2286                 if (overflow)
2287                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2288             } else
2289                 s->block_last_index[i] = -1;
2290         }
2291         if (s->quantizer_noise_shaping) {
2292             for (i = 0; i < mb_block_count; i++) {
2293                 if (!skip_dct[i]) {
2294                     s->block_last_index[i] =
2295                         dct_quantize_refine(s, s->block[i], weight[i],
2296                                             orig[i], i, s->qscale);
2297                 }
2298             }
2299         }
2300
2301         if (s->luma_elim_threshold && !s->mb_intra)
2302             for (i = 0; i < 4; i++)
2303                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2304         if (s->chroma_elim_threshold && !s->mb_intra)
2305             for (i = 4; i < mb_block_count; i++)
2306                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2307
2308         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2309             for (i = 0; i < mb_block_count; i++) {
2310                 if (s->block_last_index[i] == -1)
2311                     s->coded_score[i] = INT_MAX / 256;
2312             }
2313         }
2314     }
2315
2316     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2317         s->block_last_index[4] =
2318         s->block_last_index[5] = 0;
2319         s->block[4][0] =
2320         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2321         if (!s->chroma_y_shift) { /* 422 / 444 */
2322             for (i=6; i<12; i++) {
2323                 s->block_last_index[i] = 0;
2324                 s->block[i][0] = s->block[4][0];
2325             }
2326         }
2327     }
2328
2329     // non c quantize code returns incorrect block_last_index FIXME
2330     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2331         for (i = 0; i < mb_block_count; i++) {
2332             int j;
2333             if (s->block_last_index[i] > 0) {
2334                 for (j = 63; j > 0; j--) {
2335                     if (s->block[i][s->intra_scantable.permutated[j]])
2336                         break;
2337                 }
2338                 s->block_last_index[i] = j;
2339             }
2340         }
2341     }
2342
2343     /* huffman encode */
2344     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2345     case AV_CODEC_ID_MPEG1VIDEO:
2346     case AV_CODEC_ID_MPEG2VIDEO:
2347         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2348             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2349         break;
2350     case AV_CODEC_ID_MPEG4:
2351         if (CONFIG_MPEG4_ENCODER)
2352             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2353         break;
2354     case AV_CODEC_ID_MSMPEG4V2:
2355     case AV_CODEC_ID_MSMPEG4V3:
2356     case AV_CODEC_ID_WMV1:
2357         if (CONFIG_MSMPEG4_ENCODER)
2358             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2359         break;
2360     case AV_CODEC_ID_WMV2:
2361         if (CONFIG_WMV2_ENCODER)
2362             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2363         break;
2364     case AV_CODEC_ID_H261:
2365         if (CONFIG_H261_ENCODER)
2366             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2367         break;
2368     case AV_CODEC_ID_H263:
2369     case AV_CODEC_ID_H263P:
2370     case AV_CODEC_ID_FLV1:
2371     case AV_CODEC_ID_RV10:
2372     case AV_CODEC_ID_RV20:
2373         if (CONFIG_H263_ENCODER)
2374             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2375         break;
2376     case AV_CODEC_ID_MJPEG:
2377     case AV_CODEC_ID_AMV:
2378         if (CONFIG_MJPEG_ENCODER)
2379             ff_mjpeg_encode_mb(s, s->block);
2380         break;
2381     default:
2382         av_assert1(0);
2383     }
2384 }
2385
2386 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2387 {
2388     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2389     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2390     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2391 }
2392
2393 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2394     int i;
2395
2396     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2397
2398     /* mpeg1 */
2399     d->mb_skip_run= s->mb_skip_run;
2400     for(i=0; i<3; i++)
2401         d->last_dc[i] = s->last_dc[i];
2402
2403     /* statistics */
2404     d->mv_bits= s->mv_bits;
2405     d->i_tex_bits= s->i_tex_bits;
2406     d->p_tex_bits= s->p_tex_bits;
2407     d->i_count= s->i_count;
2408     d->f_count= s->f_count;
2409     d->b_count= s->b_count;
2410     d->skip_count= s->skip_count;
2411     d->misc_bits= s->misc_bits;
2412     d->last_bits= 0;
2413
2414     d->mb_skipped= 0;
2415     d->qscale= s->qscale;
2416     d->dquant= s->dquant;
2417
2418     d->esc3_level_length= s->esc3_level_length;
2419 }
2420
2421 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2422     int i;
2423
2424     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2425     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2426
2427     /* mpeg1 */
2428     d->mb_skip_run= s->mb_skip_run;
2429     for(i=0; i<3; i++)
2430         d->last_dc[i] = s->last_dc[i];
2431
2432     /* statistics */
2433     d->mv_bits= s->mv_bits;
2434     d->i_tex_bits= s->i_tex_bits;
2435     d->p_tex_bits= s->p_tex_bits;
2436     d->i_count= s->i_count;
2437     d->f_count= s->f_count;
2438     d->b_count= s->b_count;
2439     d->skip_count= s->skip_count;
2440     d->misc_bits= s->misc_bits;
2441
2442     d->mb_intra= s->mb_intra;
2443     d->mb_skipped= s->mb_skipped;
2444     d->mv_type= s->mv_type;
2445     d->mv_dir= s->mv_dir;
2446     d->pb= s->pb;
2447     if(s->data_partitioning){
2448         d->pb2= s->pb2;
2449         d->tex_pb= s->tex_pb;
2450     }
2451     d->block= s->block;
2452     for(i=0; i<8; i++)
2453         d->block_last_index[i]= s->block_last_index[i];
2454     d->interlaced_dct= s->interlaced_dct;
2455     d->qscale= s->qscale;
2456
2457     d->esc3_level_length= s->esc3_level_length;
2458 }
2459
2460 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2461                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2462                            int *dmin, int *next_block, int motion_x, int motion_y)
2463 {
2464     int score;
2465     uint8_t *dest_backup[3];
2466
2467     copy_context_before_encode(s, backup, type);
2468
2469     s->block= s->blocks[*next_block];
2470     s->pb= pb[*next_block];
2471     if(s->data_partitioning){
2472         s->pb2   = pb2   [*next_block];
2473         s->tex_pb= tex_pb[*next_block];
2474     }
2475
2476     if(*next_block){
2477         memcpy(dest_backup, s->dest, sizeof(s->dest));
2478         s->dest[0] = s->rd_scratchpad;
2479         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2480         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2481         av_assert0(s->linesize >= 32); //FIXME
2482     }
2483
2484     encode_mb(s, motion_x, motion_y);
2485
2486     score= put_bits_count(&s->pb);
2487     if(s->data_partitioning){
2488         score+= put_bits_count(&s->pb2);
2489         score+= put_bits_count(&s->tex_pb);
2490     }
2491
2492     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2493         ff_MPV_decode_mb(s, s->block);
2494
2495         score *= s->lambda2;
2496         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2497     }
2498
2499     if(*next_block){
2500         memcpy(s->dest, dest_backup, sizeof(s->dest));
2501     }
2502
2503     if(score<*dmin){
2504         *dmin= score;
2505         *next_block^=1;
2506
2507         copy_context_after_encode(best, s, type);
2508     }
2509 }
2510
2511 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2512     uint32_t *sq = ff_square_tab + 256;
2513     int acc=0;
2514     int x,y;
2515
2516     if(w==16 && h==16)
2517         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2518     else if(w==8 && h==8)
2519         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2520
2521     for(y=0; y<h; y++){
2522         for(x=0; x<w; x++){
2523             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2524         }
2525     }
2526
2527     av_assert2(acc>=0);
2528
2529     return acc;
2530 }
2531
2532 static int sse_mb(MpegEncContext *s){
2533     int w= 16;
2534     int h= 16;
2535
2536     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2537     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2538
2539     if(w==16 && h==16)
2540       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2541         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2542                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2543                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2544       }else{
2545         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2546                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2547                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2548       }
2549     else
2550         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2551                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2552                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2553 }
2554
2555 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2556     MpegEncContext *s= *(void**)arg;
2557
2558
2559     s->me.pre_pass=1;
2560     s->me.dia_size= s->avctx->pre_dia_size;
2561     s->first_slice_line=1;
2562     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2563         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2564             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2565         }
2566         s->first_slice_line=0;
2567     }
2568
2569     s->me.pre_pass=0;
2570
2571     return 0;
2572 }
2573
2574 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2575     MpegEncContext *s= *(void**)arg;
2576
2577     ff_check_alignment();
2578
2579     s->me.dia_size= s->avctx->dia_size;
2580     s->first_slice_line=1;
2581     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2582         s->mb_x=0; //for block init below
2583         ff_init_block_index(s);
2584         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2585             s->block_index[0]+=2;
2586             s->block_index[1]+=2;
2587             s->block_index[2]+=2;
2588             s->block_index[3]+=2;
2589
2590             /* compute motion vector & mb_type and store in context */
2591             if(s->pict_type==AV_PICTURE_TYPE_B)
2592                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2593             else
2594                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2595         }
2596         s->first_slice_line=0;
2597     }
2598     return 0;
2599 }
2600
2601 static int mb_var_thread(AVCodecContext *c, void *arg){
2602     MpegEncContext *s= *(void**)arg;
2603     int mb_x, mb_y;
2604
2605     ff_check_alignment();
2606
2607     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2608         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2609             int xx = mb_x * 16;
2610             int yy = mb_y * 16;
2611             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2612             int varc;
2613             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2614
2615             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2616                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2617
2618             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2619             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2620             s->me.mb_var_sum_temp    += varc;
2621         }
2622     }
2623     return 0;
2624 }
2625
2626 static void write_slice_end(MpegEncContext *s){
2627     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2628         if(s->partitioned_frame){
2629             ff_mpeg4_merge_partitions(s);
2630         }
2631
2632         ff_mpeg4_stuffing(&s->pb);
2633     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2634         ff_mjpeg_encode_stuffing(s);
2635     }
2636
2637     avpriv_align_put_bits(&s->pb);
2638     flush_put_bits(&s->pb);
2639
2640     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2641         s->misc_bits+= get_bits_diff(s);
2642 }
2643
2644 static void write_mb_info(MpegEncContext *s)
2645 {
2646     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2647     int offset = put_bits_count(&s->pb);
2648     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2649     int gobn = s->mb_y / s->gob_index;
2650     int pred_x, pred_y;
2651     if (CONFIG_H263_ENCODER)
2652         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2653     bytestream_put_le32(&ptr, offset);
2654     bytestream_put_byte(&ptr, s->qscale);
2655     bytestream_put_byte(&ptr, gobn);
2656     bytestream_put_le16(&ptr, mba);
2657     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2658     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2659     /* 4MV not implemented */
2660     bytestream_put_byte(&ptr, 0); /* hmv2 */
2661     bytestream_put_byte(&ptr, 0); /* vmv2 */
2662 }
2663
2664 static void update_mb_info(MpegEncContext *s, int startcode)
2665 {
2666     if (!s->mb_info)
2667         return;
2668     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2669         s->mb_info_size += 12;
2670         s->prev_mb_info = s->last_mb_info;
2671     }
2672     if (startcode) {
2673         s->prev_mb_info = put_bits_count(&s->pb)/8;
2674         /* This might have incremented mb_info_size above, and we return without
2675          * actually writing any info into that slot yet. But in that case,
2676          * this will be called again at the start of the after writing the
2677          * start code, actually writing the mb info. */
2678         return;
2679     }
2680
2681     s->last_mb_info = put_bits_count(&s->pb)/8;
2682     if (!s->mb_info_size)
2683         s->mb_info_size += 12;
2684     write_mb_info(s);
2685 }
2686
2687 static int encode_thread(AVCodecContext *c, void *arg){
2688     MpegEncContext *s= *(void**)arg;
2689     int mb_x, mb_y, pdif = 0;
2690     int chr_h= 16>>s->chroma_y_shift;
2691     int i, j;
2692     MpegEncContext best_s, backup_s;
2693     uint8_t bit_buf[2][MAX_MB_BYTES];
2694     uint8_t bit_buf2[2][MAX_MB_BYTES];
2695     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2696     PutBitContext pb[2], pb2[2], tex_pb[2];
2697
2698     ff_check_alignment();
2699
2700     for(i=0; i<2; i++){
2701         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2702         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2703         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2704     }
2705
2706     s->last_bits= put_bits_count(&s->pb);
2707     s->mv_bits=0;
2708     s->misc_bits=0;
2709     s->i_tex_bits=0;
2710     s->p_tex_bits=0;
2711     s->i_count=0;
2712     s->f_count=0;
2713     s->b_count=0;
2714     s->skip_count=0;
2715
2716     for(i=0; i<3; i++){
2717         /* init last dc values */
2718         /* note: quant matrix value (8) is implied here */
2719         s->last_dc[i] = 128 << s->intra_dc_precision;
2720
2721         s->current_picture.error[i] = 0;
2722     }
2723     if(s->codec_id==AV_CODEC_ID_AMV){
2724         s->last_dc[0] = 128*8/13;
2725         s->last_dc[1] = 128*8/14;
2726         s->last_dc[2] = 128*8/14;
2727     }
2728     s->mb_skip_run = 0;
2729     memset(s->last_mv, 0, sizeof(s->last_mv));
2730
2731     s->last_mv_dir = 0;
2732
2733     switch(s->codec_id){
2734     case AV_CODEC_ID_H263:
2735     case AV_CODEC_ID_H263P:
2736     case AV_CODEC_ID_FLV1:
2737         if (CONFIG_H263_ENCODER)
2738             s->gob_index = ff_h263_get_gob_height(s);
2739         break;
2740     case AV_CODEC_ID_MPEG4:
2741         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2742             ff_mpeg4_init_partitions(s);
2743         break;
2744     }
2745
2746     s->resync_mb_x=0;
2747     s->resync_mb_y=0;
2748     s->first_slice_line = 1;
2749     s->ptr_lastgob = s->pb.buf;
2750     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2751         s->mb_x=0;
2752         s->mb_y= mb_y;
2753
2754         ff_set_qscale(s, s->qscale);
2755         ff_init_block_index(s);
2756
2757         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2758             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2759             int mb_type= s->mb_type[xy];
2760 //            int d;
2761             int dmin= INT_MAX;
2762             int dir;
2763
2764             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2765                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2766                 return -1;
2767             }
2768             if(s->data_partitioning){
2769                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2770                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2771                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2772                     return -1;
2773                 }
2774             }
2775
2776             s->mb_x = mb_x;
2777             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2778             ff_update_block_index(s);
2779
2780             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2781                 ff_h261_reorder_mb_index(s);
2782                 xy= s->mb_y*s->mb_stride + s->mb_x;
2783                 mb_type= s->mb_type[xy];
2784             }
2785
2786             /* write gob / video packet header  */
2787             if(s->rtp_mode){
2788                 int current_packet_size, is_gob_start;
2789
2790                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2791
2792                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2793
2794                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2795
2796                 switch(s->codec_id){
2797                 case AV_CODEC_ID_H263:
2798                 case AV_CODEC_ID_H263P:
2799                     if(!s->h263_slice_structured)
2800                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2801                     break;
2802                 case AV_CODEC_ID_MPEG2VIDEO:
2803                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2804                 case AV_CODEC_ID_MPEG1VIDEO:
2805                     if(s->mb_skip_run) is_gob_start=0;
2806                     break;
2807                 case AV_CODEC_ID_MJPEG:
2808                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2809                     break;
2810                 }
2811
2812                 if(is_gob_start){
2813                     if(s->start_mb_y != mb_y || mb_x!=0){
2814                         write_slice_end(s);
2815
2816                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2817                             ff_mpeg4_init_partitions(s);
2818                         }
2819                     }
2820
2821                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2822                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2823
2824                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2825                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2826                         int d = 100 / s->error_rate;
2827                         if(r % d == 0){
2828                             current_packet_size=0;
2829                             s->pb.buf_ptr= s->ptr_lastgob;
2830                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2831                         }
2832                     }
2833
2834                     if (s->avctx->rtp_callback){
2835                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2836                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2837                     }
2838                     update_mb_info(s, 1);
2839
2840                     switch(s->codec_id){
2841                     case AV_CODEC_ID_MPEG4:
2842                         if (CONFIG_MPEG4_ENCODER) {
2843                             ff_mpeg4_encode_video_packet_header(s);
2844                             ff_mpeg4_clean_buffers(s);
2845                         }
2846                     break;
2847                     case AV_CODEC_ID_MPEG1VIDEO:
2848                     case AV_CODEC_ID_MPEG2VIDEO:
2849                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2850                             ff_mpeg1_encode_slice_header(s);
2851                             ff_mpeg1_clean_buffers(s);
2852                         }
2853                     break;
2854                     case AV_CODEC_ID_H263:
2855                     case AV_CODEC_ID_H263P:
2856                         if (CONFIG_H263_ENCODER)
2857                             ff_h263_encode_gob_header(s, mb_y);
2858                     break;
2859                     }
2860
2861                     if(s->flags&CODEC_FLAG_PASS1){
2862                         int bits= put_bits_count(&s->pb);
2863                         s->misc_bits+= bits - s->last_bits;
2864                         s->last_bits= bits;
2865                     }
2866
2867                     s->ptr_lastgob += current_packet_size;
2868                     s->first_slice_line=1;
2869                     s->resync_mb_x=mb_x;
2870                     s->resync_mb_y=mb_y;
2871                 }
2872             }
2873
2874             if(  (s->resync_mb_x   == s->mb_x)
2875                && s->resync_mb_y+1 == s->mb_y){
2876                 s->first_slice_line=0;
2877             }
2878
2879             s->mb_skipped=0;
2880             s->dquant=0; //only for QP_RD
2881
2882             update_mb_info(s, 0);
2883
2884             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2885                 int next_block=0;
2886                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2887
2888                 copy_context_before_encode(&backup_s, s, -1);
2889                 backup_s.pb= s->pb;
2890                 best_s.data_partitioning= s->data_partitioning;
2891                 best_s.partitioned_frame= s->partitioned_frame;
2892                 if(s->data_partitioning){
2893                     backup_s.pb2= s->pb2;
2894                     backup_s.tex_pb= s->tex_pb;
2895                 }
2896
2897                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2898                     s->mv_dir = MV_DIR_FORWARD;
2899                     s->mv_type = MV_TYPE_16X16;
2900                     s->mb_intra= 0;
2901                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2902                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2903                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2904                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2905                 }
2906                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2907                     s->mv_dir = MV_DIR_FORWARD;
2908                     s->mv_type = MV_TYPE_FIELD;
2909                     s->mb_intra= 0;
2910                     for(i=0; i<2; i++){
2911                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2912                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2913                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2914                     }
2915                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2916                                  &dmin, &next_block, 0, 0);
2917                 }
2918                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2919                     s->mv_dir = MV_DIR_FORWARD;
2920                     s->mv_type = MV_TYPE_16X16;
2921                     s->mb_intra= 0;
2922                     s->mv[0][0][0] = 0;
2923                     s->mv[0][0][1] = 0;
2924                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2925                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2926                 }
2927                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2928                     s->mv_dir = MV_DIR_FORWARD;
2929                     s->mv_type = MV_TYPE_8X8;
2930                     s->mb_intra= 0;
2931                     for(i=0; i<4; i++){
2932                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2933                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2934                     }
2935                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2936                                  &dmin, &next_block, 0, 0);
2937                 }
2938                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2939                     s->mv_dir = MV_DIR_FORWARD;
2940                     s->mv_type = MV_TYPE_16X16;
2941                     s->mb_intra= 0;
2942                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2943                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2944                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2945                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2946                 }
2947                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2948                     s->mv_dir = MV_DIR_BACKWARD;
2949                     s->mv_type = MV_TYPE_16X16;
2950                     s->mb_intra= 0;
2951                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2952                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2953                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2954                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2955                 }
2956                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2957                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2958                     s->mv_type = MV_TYPE_16X16;
2959                     s->mb_intra= 0;
2960                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2961                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2962                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2963                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2964                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2965                                  &dmin, &next_block, 0, 0);
2966                 }
2967                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2968                     s->mv_dir = MV_DIR_FORWARD;
2969                     s->mv_type = MV_TYPE_FIELD;
2970                     s->mb_intra= 0;
2971                     for(i=0; i<2; i++){
2972                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2973                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2974                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2975                     }
2976                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2977                                  &dmin, &next_block, 0, 0);
2978                 }
2979                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2980                     s->mv_dir = MV_DIR_BACKWARD;
2981                     s->mv_type = MV_TYPE_FIELD;
2982                     s->mb_intra= 0;
2983                     for(i=0; i<2; i++){
2984                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2985                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2986                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2987                     }
2988                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2989                                  &dmin, &next_block, 0, 0);
2990                 }
2991                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2992                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2993                     s->mv_type = MV_TYPE_FIELD;
2994                     s->mb_intra= 0;
2995                     for(dir=0; dir<2; dir++){
2996                         for(i=0; i<2; i++){
2997                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2998                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2999                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3000                         }
3001                     }
3002                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3003                                  &dmin, &next_block, 0, 0);
3004                 }
3005                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3006                     s->mv_dir = 0;
3007                     s->mv_type = MV_TYPE_16X16;
3008                     s->mb_intra= 1;
3009                     s->mv[0][0][0] = 0;
3010                     s->mv[0][0][1] = 0;
3011                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3012                                  &dmin, &next_block, 0, 0);
3013                     if(s->h263_pred || s->h263_aic){
3014                         if(best_s.mb_intra)
3015                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3016                         else
3017                             ff_clean_intra_table_entries(s); //old mode?
3018                     }
3019                 }
3020
3021                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3022                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3023                         const int last_qp= backup_s.qscale;
3024                         int qpi, qp, dc[6];
3025                         int16_t ac[6][16];
3026                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3027                         static const int dquant_tab[4]={-1,1,-2,2};
3028                         int storecoefs = s->mb_intra && s->dc_val[0];
3029
3030                         av_assert2(backup_s.dquant == 0);
3031
3032                         //FIXME intra
3033                         s->mv_dir= best_s.mv_dir;
3034                         s->mv_type = MV_TYPE_16X16;
3035                         s->mb_intra= best_s.mb_intra;
3036                         s->mv[0][0][0] = best_s.mv[0][0][0];
3037                         s->mv[0][0][1] = best_s.mv[0][0][1];
3038                         s->mv[1][0][0] = best_s.mv[1][0][0];
3039                         s->mv[1][0][1] = best_s.mv[1][0][1];
3040
3041                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3042                         for(; qpi<4; qpi++){
3043                             int dquant= dquant_tab[qpi];
3044                             qp= last_qp + dquant;
3045                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3046                                 continue;
3047                             backup_s.dquant= dquant;
3048                             if(storecoefs){
3049                                 for(i=0; i<6; i++){
3050                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3051                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3052                                 }
3053                             }
3054
3055                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3056                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3057                             if(best_s.qscale != qp){
3058                                 if(storecoefs){
3059                                     for(i=0; i<6; i++){
3060                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3061                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3062                                     }
3063                                 }
3064                             }
3065                         }
3066                     }
3067                 }
3068                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3069                     int mx= s->b_direct_mv_table[xy][0];
3070                     int my= s->b_direct_mv_table[xy][1];
3071
3072                     backup_s.dquant = 0;
3073                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3074                     s->mb_intra= 0;
3075                     ff_mpeg4_set_direct_mv(s, mx, my);
3076                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3077                                  &dmin, &next_block, mx, my);
3078                 }
3079                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3080                     backup_s.dquant = 0;
3081                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3082                     s->mb_intra= 0;
3083                     ff_mpeg4_set_direct_mv(s, 0, 0);
3084                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3085                                  &dmin, &next_block, 0, 0);
3086                 }
3087                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3088                     int coded=0;
3089                     for(i=0; i<6; i++)
3090                         coded |= s->block_last_index[i];
3091                     if(coded){
3092                         int mx,my;
3093                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3094                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3095                             mx=my=0; //FIXME find the one we actually used
3096                             ff_mpeg4_set_direct_mv(s, mx, my);
3097                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3098                             mx= s->mv[1][0][0];
3099                             my= s->mv[1][0][1];
3100                         }else{
3101                             mx= s->mv[0][0][0];
3102                             my= s->mv[0][0][1];
3103                         }
3104
3105                         s->mv_dir= best_s.mv_dir;
3106                         s->mv_type = best_s.mv_type;
3107                         s->mb_intra= 0;
3108 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3109                         s->mv[0][0][1] = best_s.mv[0][0][1];
3110                         s->mv[1][0][0] = best_s.mv[1][0][0];
3111                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3112                         backup_s.dquant= 0;
3113                         s->skipdct=1;
3114                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3115                                         &dmin, &next_block, mx, my);
3116                         s->skipdct=0;
3117                     }
3118                 }
3119
3120                 s->current_picture.qscale_table[xy] = best_s.qscale;
3121
3122                 copy_context_after_encode(s, &best_s, -1);
3123
3124                 pb_bits_count= put_bits_count(&s->pb);
3125                 flush_put_bits(&s->pb);
3126                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3127                 s->pb= backup_s.pb;
3128
3129                 if(s->data_partitioning){
3130                     pb2_bits_count= put_bits_count(&s->pb2);
3131                     flush_put_bits(&s->pb2);
3132                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3133                     s->pb2= backup_s.pb2;
3134
3135                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3136                     flush_put_bits(&s->tex_pb);
3137                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3138                     s->tex_pb= backup_s.tex_pb;
3139                 }
3140                 s->last_bits= put_bits_count(&s->pb);
3141
3142                 if (CONFIG_H263_ENCODER &&
3143                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3144                     ff_h263_update_motion_val(s);
3145
3146                 if(next_block==0){ //FIXME 16 vs linesize16
3147                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3148                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3149                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3150                 }
3151
3152                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3153                     ff_MPV_decode_mb(s, s->block);
3154             } else {
3155                 int motion_x = 0, motion_y = 0;
3156                 s->mv_type=MV_TYPE_16X16;
3157                 // only one MB-Type possible
3158
3159                 switch(mb_type){
3160                 case CANDIDATE_MB_TYPE_INTRA:
3161                     s->mv_dir = 0;
3162                     s->mb_intra= 1;
3163                     motion_x= s->mv[0][0][0] = 0;
3164                     motion_y= s->mv[0][0][1] = 0;
3165                     break;
3166                 case CANDIDATE_MB_TYPE_INTER:
3167                     s->mv_dir = MV_DIR_FORWARD;
3168                     s->mb_intra= 0;
3169                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3170                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3171                     break;
3172                 case CANDIDATE_MB_TYPE_INTER_I:
3173                     s->mv_dir = MV_DIR_FORWARD;
3174                     s->mv_type = MV_TYPE_FIELD;
3175                     s->mb_intra= 0;
3176                     for(i=0; i<2; i++){
3177                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3178                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3179                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3180                     }
3181                     break;
3182                 case CANDIDATE_MB_TYPE_INTER4V:
3183                     s->mv_dir = MV_DIR_FORWARD;
3184                     s->mv_type = MV_TYPE_8X8;
3185                     s->mb_intra= 0;
3186                     for(i=0; i<4; i++){
3187                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3188                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3189                     }
3190                     break;
3191                 case CANDIDATE_MB_TYPE_DIRECT:
3192                     if (CONFIG_MPEG4_ENCODER) {
3193                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3194                         s->mb_intra= 0;
3195                         motion_x=s->b_direct_mv_table[xy][0];
3196                         motion_y=s->b_direct_mv_table[xy][1];
3197                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3198                     }
3199                     break;
3200                 case CANDIDATE_MB_TYPE_DIRECT0:
3201                     if (CONFIG_MPEG4_ENCODER) {
3202                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3203                         s->mb_intra= 0;
3204                         ff_mpeg4_set_direct_mv(s, 0, 0);
3205                     }
3206                     break;
3207                 case CANDIDATE_MB_TYPE_BIDIR:
3208                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3209                     s->mb_intra= 0;
3210                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3211                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3212                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3213                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3214                     break;
3215                 case CANDIDATE_MB_TYPE_BACKWARD:
3216                     s->mv_dir = MV_DIR_BACKWARD;
3217                     s->mb_intra= 0;
3218                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3219                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3220                     break;
3221                 case CANDIDATE_MB_TYPE_FORWARD:
3222                     s->mv_dir = MV_DIR_FORWARD;
3223                     s->mb_intra= 0;
3224                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3225                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3226                     break;
3227                 case CANDIDATE_MB_TYPE_FORWARD_I:
3228                     s->mv_dir = MV_DIR_FORWARD;
3229                     s->mv_type = MV_TYPE_FIELD;
3230                     s->mb_intra= 0;
3231                     for(i=0; i<2; i++){
3232                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3233                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3234                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3235                     }
3236                     break;
3237                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3238                     s->mv_dir = MV_DIR_BACKWARD;
3239                     s->mv_type = MV_TYPE_FIELD;
3240                     s->mb_intra= 0;
3241                     for(i=0; i<2; i++){
3242                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3243                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3244                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3245                     }
3246                     break;
3247                 case CANDIDATE_MB_TYPE_BIDIR_I:
3248                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3249                     s->mv_type = MV_TYPE_FIELD;
3250                     s->mb_intra= 0;
3251                     for(dir=0; dir<2; dir++){
3252                         for(i=0; i<2; i++){
3253                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3254                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3255                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3256                         }
3257                     }
3258                     break;
3259                 default:
3260                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3261                 }
3262
3263                 encode_mb(s, motion_x, motion_y);
3264
3265                 // RAL: Update last macroblock type
3266                 s->last_mv_dir = s->mv_dir;
3267
3268                 if (CONFIG_H263_ENCODER &&
3269                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3270                     ff_h263_update_motion_val(s);
3271
3272                 ff_MPV_decode_mb(s, s->block);
3273             }
3274
3275             /* clean the MV table in IPS frames for direct mode in B frames */
3276             if(s->mb_intra /* && I,P,S_TYPE */){
3277                 s->p_mv_table[xy][0]=0;
3278                 s->p_mv_table[xy][1]=0;
3279             }
3280
3281             if(s->flags&CODEC_FLAG_PSNR){
3282                 int w= 16;
3283                 int h= 16;
3284
3285                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3286                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3287
3288                 s->current_picture.error[0] += sse(
3289                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3290                     s->dest[0], w, h, s->linesize);
3291                 s->current_picture.error[1] += sse(
3292                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3293                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3294                 s->current_picture.error[2] += sse(
3295                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3296                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3297             }
3298             if(s->loop_filter){
3299                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3300                     ff_h263_loop_filter(s);
3301             }
3302             av_dlog(s->avctx, "MB %d %d bits\n",
3303                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3304         }
3305     }
3306
3307     //not beautiful here but we must write it before flushing so it has to be here
3308     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3309         ff_msmpeg4_encode_ext_header(s);
3310
3311     write_slice_end(s);
3312
3313     /* Send the last GOB if RTP */
3314     if (s->avctx->rtp_callback) {
3315         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3316         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3317         /* Call the RTP callback to send the last GOB */
3318         emms_c();
3319         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3320     }
3321
3322     return 0;
3323 }
3324
3325 #define MERGE(field) dst->field += src->field; src->field=0
3326 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3327     MERGE(me.scene_change_score);
3328     MERGE(me.mc_mb_var_sum_temp);
3329     MERGE(me.mb_var_sum_temp);
3330 }
3331
3332 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3333     int i;
3334
3335     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3336     MERGE(dct_count[1]);
3337     MERGE(mv_bits);
3338     MERGE(i_tex_bits);
3339     MERGE(p_tex_bits);
3340     MERGE(i_count);
3341     MERGE(f_count);
3342     MERGE(b_count);
3343     MERGE(skip_count);
3344     MERGE(misc_bits);
3345     MERGE(er.error_count);
3346     MERGE(padding_bug_score);
3347     MERGE(current_picture.error[0]);
3348     MERGE(current_picture.error[1]);
3349     MERGE(current_picture.error[2]);
3350
3351     if(dst->avctx->noise_reduction){
3352         for(i=0; i<64; i++){
3353             MERGE(dct_error_sum[0][i]);
3354             MERGE(dct_error_sum[1][i]);
3355         }
3356     }
3357
3358     assert(put_bits_count(&src->pb) % 8 ==0);
3359     assert(put_bits_count(&dst->pb) % 8 ==0);
3360     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3361     flush_put_bits(&dst->pb);
3362 }
3363
3364 static int estimate_qp(MpegEncContext *s, int dry_run){
3365     if (s->next_lambda){
3366         s->current_picture_ptr->f->quality =
3367         s->current_picture.f->quality = s->next_lambda;
3368         if(!dry_run) s->next_lambda= 0;
3369     } else if (!s->fixed_qscale) {
3370         s->current_picture_ptr->f->quality =
3371         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3372         if (s->current_picture.f->quality < 0)
3373             return -1;
3374     }
3375
3376     if(s->adaptive_quant){
3377         switch(s->codec_id){
3378         case AV_CODEC_ID_MPEG4:
3379             if (CONFIG_MPEG4_ENCODER)
3380                 ff_clean_mpeg4_qscales(s);
3381             break;
3382         case AV_CODEC_ID_H263:
3383         case AV_CODEC_ID_H263P:
3384         case AV_CODEC_ID_FLV1:
3385             if (CONFIG_H263_ENCODER)
3386                 ff_clean_h263_qscales(s);
3387             break;
3388         default:
3389             ff_init_qscale_tab(s);
3390         }
3391
3392         s->lambda= s->lambda_table[0];
3393         //FIXME broken
3394     }else
3395         s->lambda = s->current_picture.f->quality;
3396     update_qscale(s);
3397     return 0;
3398 }
3399
3400 /* must be called before writing the header */
3401 static void set_frame_distances(MpegEncContext * s){
3402     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3403     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3404
3405     if(s->pict_type==AV_PICTURE_TYPE_B){
3406         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3407         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3408     }else{
3409         s->pp_time= s->time - s->last_non_b_time;
3410         s->last_non_b_time= s->time;
3411         assert(s->picture_number==0 || s->pp_time > 0);
3412     }
3413 }
3414
3415 static int encode_picture(MpegEncContext *s, int picture_number)
3416 {
3417     int i, ret;
3418     int bits;
3419     int context_count = s->slice_context_count;
3420
3421     s->picture_number = picture_number;
3422
3423     /* Reset the average MB variance */
3424     s->me.mb_var_sum_temp    =
3425     s->me.mc_mb_var_sum_temp = 0;
3426
3427     /* we need to initialize some time vars before we can encode b-frames */
3428     // RAL: Condition added for MPEG1VIDEO
3429     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3430         set_frame_distances(s);
3431     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3432         ff_set_mpeg4_time(s);
3433
3434     s->me.scene_change_score=0;
3435
3436 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3437
3438     if(s->pict_type==AV_PICTURE_TYPE_I){
3439         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3440         else                        s->no_rounding=0;
3441     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3442         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3443             s->no_rounding ^= 1;
3444     }
3445
3446     if(s->flags & CODEC_FLAG_PASS2){
3447         if (estimate_qp(s,1) < 0)
3448             return -1;
3449         ff_get_2pass_fcode(s);
3450     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3451         if(s->pict_type==AV_PICTURE_TYPE_B)
3452             s->lambda= s->last_lambda_for[s->pict_type];
3453         else
3454             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3455         update_qscale(s);
3456     }
3457
3458     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3459         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3460         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3461         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3462         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3463     }
3464
3465     s->mb_intra=0; //for the rate distortion & bit compare functions
3466     for(i=1; i<context_count; i++){
3467         ret = ff_update_duplicate_context(s->thread_context[i], s);
3468         if (ret < 0)
3469             return ret;
3470     }
3471
3472     if(ff_init_me(s)<0)
3473         return -1;
3474
3475     /* Estimate motion for every MB */
3476     if(s->pict_type != AV_PICTURE_TYPE_I){
3477         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3478         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3479         if (s->pict_type != AV_PICTURE_TYPE_B) {
3480             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3481                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3482             }
3483         }
3484
3485         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3486     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3487         /* I-Frame */
3488         for(i=0; i<s->mb_stride*s->mb_height; i++)
3489             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3490
3491         if(!s->fixed_qscale){
3492             /* finding spatial complexity for I-frame rate control */
3493             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3494         }
3495     }
3496     for(i=1; i<context_count; i++){
3497         merge_context_after_me(s, s->thread_context[i]);
3498     }
3499     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3500     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3501     emms_c();
3502
3503     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3504         s->pict_type= AV_PICTURE_TYPE_I;
3505         for(i=0; i<s->mb_stride*s->mb_height; i++)
3506             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3507         if(s->msmpeg4_version >= 3)
3508             s->no_rounding=1;
3509         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3510                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3511     }
3512
3513     if(!s->umvplus){
3514         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3515             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3516
3517             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3518                 int a,b;
3519                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3520                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3521                 s->f_code= FFMAX3(s->f_code, a, b);
3522             }
3523
3524             ff_fix_long_p_mvs(s);
3525             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3526             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3527                 int j;
3528                 for(i=0; i<2; i++){
3529                     for(j=0; j<2; j++)
3530                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3531                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3532                 }
3533             }
3534         }
3535
3536         if(s->pict_type==AV_PICTURE_TYPE_B){
3537             int a, b;
3538
3539             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3540             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3541             s->f_code = FFMAX(a, b);
3542
3543             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3544             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3545             s->b_code = FFMAX(a, b);
3546
3547             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3548             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3549             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3550             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3551             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3552                 int dir, j;
3553                 for(dir=0; dir<2; dir++){
3554                     for(i=0; i<2; i++){
3555                         for(j=0; j<2; j++){
3556                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3557                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3558                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3559                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3560                         }
3561                     }
3562                 }
3563             }
3564         }
3565     }
3566
3567     if (estimate_qp(s, 0) < 0)
3568         return -1;
3569
3570     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3571         s->qscale= 3; //reduce clipping problems
3572
3573     if (s->out_format == FMT_MJPEG) {
3574         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3575         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3576
3577         if (s->avctx->intra_matrix) {
3578             chroma_matrix =
3579             luma_matrix = s->avctx->intra_matrix;
3580         }
3581         if (s->avctx->chroma_intra_matrix)
3582             chroma_matrix = s->avctx->chroma_intra_matrix;
3583
3584         /* for mjpeg, we do include qscale in the matrix */
3585         for(i=1;i<64;i++){
3586             int j = s->idsp.idct_permutation[i];
3587
3588             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3589             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3590         }
3591         s->y_dc_scale_table=
3592         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3593         s->chroma_intra_matrix[0] =
3594         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3595         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3596                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3597         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3598                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3599         s->qscale= 8;
3600     }
3601     if(s->codec_id == AV_CODEC_ID_AMV){
3602         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3603         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3604         for(i=1;i<64;i++){
3605             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3606
3607             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3608             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3609         }
3610         s->y_dc_scale_table= y;
3611         s->c_dc_scale_table= c;
3612         s->intra_matrix[0] = 13;
3613         s->chroma_intra_matrix[0] = 14;
3614         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3615                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3616         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3617                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3618         s->qscale= 8;
3619     }
3620
3621     //FIXME var duplication
3622     s->current_picture_ptr->f->key_frame =
3623     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3624     s->current_picture_ptr->f->pict_type =
3625     s->current_picture.f->pict_type = s->pict_type;
3626
3627     if (s->current_picture.f->key_frame)
3628         s->picture_in_gop_number=0;
3629
3630     s->mb_x = s->mb_y = 0;
3631     s->last_bits= put_bits_count(&s->pb);
3632     switch(s->out_format) {
3633     case FMT_MJPEG:
3634         if (CONFIG_MJPEG_ENCODER)
3635             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3636                                            s->intra_matrix, s->chroma_intra_matrix);
3637         break;
3638     case FMT_H261:
3639         if (CONFIG_H261_ENCODER)
3640             ff_h261_encode_picture_header(s, picture_number);
3641         break;
3642     case FMT_H263:
3643         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3644             ff_wmv2_encode_picture_header(s, picture_number);
3645         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3646             ff_msmpeg4_encode_picture_header(s, picture_number);
3647         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3648             ff_mpeg4_encode_picture_header(s, picture_number);
3649         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3650             ff_rv10_encode_picture_header(s, picture_number);
3651         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3652             ff_rv20_encode_picture_header(s, picture_number);
3653         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3654             ff_flv_encode_picture_header(s, picture_number);
3655         else if (CONFIG_H263_ENCODER)
3656             ff_h263_encode_picture_header(s, picture_number);
3657         break;
3658     case FMT_MPEG1:
3659         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3660             ff_mpeg1_encode_picture_header(s, picture_number);
3661         break;
3662     default:
3663         av_assert0(0);
3664     }
3665     bits= put_bits_count(&s->pb);
3666     s->header_bits= bits - s->last_bits;
3667
3668     for(i=1; i<context_count; i++){
3669         update_duplicate_context_after_me(s->thread_context[i], s);
3670     }
3671     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3672     for(i=1; i<context_count; i++){
3673         merge_context_after_encode(s, s->thread_context[i]);
3674     }
3675     emms_c();
3676     return 0;
3677 }
3678
3679 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3680     const int intra= s->mb_intra;
3681     int i;
3682
3683     s->dct_count[intra]++;
3684
3685     for(i=0; i<64; i++){
3686         int level= block[i];
3687
3688         if(level){
3689             if(level>0){
3690                 s->dct_error_sum[intra][i] += level;
3691                 level -= s->dct_offset[intra][i];
3692                 if(level<0) level=0;
3693             }else{
3694                 s->dct_error_sum[intra][i] -= level;
3695                 level += s->dct_offset[intra][i];
3696                 if(level>0) level=0;
3697             }
3698             block[i]= level;
3699         }
3700     }
3701 }
3702
3703 static int dct_quantize_trellis_c(MpegEncContext *s,
3704                                   int16_t *block, int n,
3705                                   int qscale, int *overflow){
3706     const int *qmat;
3707     const uint8_t *scantable= s->intra_scantable.scantable;
3708     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3709     int max=0;
3710     unsigned int threshold1, threshold2;
3711     int bias=0;
3712     int run_tab[65];
3713     int level_tab[65];
3714     int score_tab[65];
3715     int survivor[65];
3716     int survivor_count;
3717     int last_run=0;
3718     int last_level=0;
3719     int last_score= 0;
3720     int last_i;
3721     int coeff[2][64];
3722     int coeff_count[64];
3723     int qmul, qadd, start_i, last_non_zero, i, dc;
3724     const int esc_length= s->ac_esc_length;
3725     uint8_t * length;
3726     uint8_t * last_length;
3727     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3728
3729     s->fdsp.fdct(block);
3730
3731     if(s->dct_error_sum)
3732         s->denoise_dct(s, block);
3733     qmul= qscale*16;
3734     qadd= ((qscale-1)|1)*8;
3735
3736     if (s->mb_intra) {
3737         int q;
3738         if (!s->h263_aic) {
3739             if (n < 4)
3740                 q = s->y_dc_scale;
3741             else
3742                 q = s->c_dc_scale;
3743             q = q << 3;
3744         } else{
3745             /* For AIC we skip quant/dequant of INTRADC */
3746             q = 1 << 3;
3747             qadd=0;
3748         }
3749
3750         /* note: block[0] is assumed to be positive */
3751         block[0] = (block[0] + (q >> 1)) / q;
3752         start_i = 1;
3753         last_non_zero = 0;
3754         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3755         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3756             bias= 1<<(QMAT_SHIFT-1);
3757         length     = s->intra_ac_vlc_length;
3758         last_length= s->intra_ac_vlc_last_length;
3759     } else {
3760         start_i = 0;
3761         last_non_zero = -1;
3762         qmat = s->q_inter_matrix[qscale];
3763         length     = s->inter_ac_vlc_length;
3764         last_length= s->inter_ac_vlc_last_length;
3765     }
3766     last_i= start_i;
3767
3768     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3769     threshold2= (threshold1<<1);
3770
3771     for(i=63; i>=start_i; i--) {
3772         const int j = scantable[i];
3773         int level = block[j] * qmat[j];
3774
3775         if(((unsigned)(level+threshold1))>threshold2){
3776             last_non_zero = i;
3777             break;
3778         }
3779     }
3780
3781     for(i=start_i; i<=last_non_zero; i++) {
3782         const int j = scantable[i];
3783         int level = block[j] * qmat[j];
3784
3785 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3786 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3787         if(((unsigned)(level+threshold1))>threshold2){
3788             if(level>0){
3789                 level= (bias + level)>>QMAT_SHIFT;
3790                 coeff[0][i]= level;
3791                 coeff[1][i]= level-1;
3792 //                coeff[2][k]= level-2;
3793             }else{
3794                 level= (bias - level)>>QMAT_SHIFT;
3795                 coeff[0][i]= -level;
3796                 coeff[1][i]= -level+1;
3797 //                coeff[2][k]= -level+2;
3798             }
3799             coeff_count[i]= FFMIN(level, 2);
3800             av_assert2(coeff_count[i]);
3801             max |=level;
3802         }else{
3803             coeff[0][i]= (level>>31)|1;
3804             coeff_count[i]= 1;
3805         }
3806     }
3807
3808     *overflow= s->max_qcoeff < max; //overflow might have happened
3809
3810     if(last_non_zero < start_i){
3811         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3812         return last_non_zero;
3813     }
3814
3815     score_tab[start_i]= 0;
3816     survivor[0]= start_i;
3817     survivor_count= 1;
3818
3819     for(i=start_i; i<=last_non_zero; i++){
3820         int level_index, j, zero_distortion;
3821         int dct_coeff= FFABS(block[ scantable[i] ]);
3822         int best_score=256*256*256*120;
3823
3824         if (s->fdsp.fdct == ff_fdct_ifast)
3825             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3826         zero_distortion= dct_coeff*dct_coeff;
3827
3828         for(level_index=0; level_index < coeff_count[i]; level_index++){
3829             int distortion;
3830             int level= coeff[level_index][i];
3831             const int alevel= FFABS(level);
3832             int unquant_coeff;
3833
3834             av_assert2(level);
3835
3836             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3837                 unquant_coeff= alevel*qmul + qadd;
3838             }else{ //MPEG1
3839                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3840                 if(s->mb_intra){
3841                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3842                         unquant_coeff =   (unquant_coeff - 1) | 1;
3843                 }else{
3844                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3845                         unquant_coeff =   (unquant_coeff - 1) | 1;
3846                 }
3847                 unquant_coeff<<= 3;
3848             }
3849
3850             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3851             level+=64;
3852             if((level&(~127)) == 0){
3853                 for(j=survivor_count-1; j>=0; j--){
3854                     int run= i - survivor[j];
3855                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3856                     score += score_tab[i-run];
3857
3858                     if(score < best_score){
3859                         best_score= score;
3860                         run_tab[i+1]= run;
3861                         level_tab[i+1]= level-64;
3862                     }
3863                 }
3864
3865                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3866                     for(j=survivor_count-1; j>=0; j--){
3867                         int run= i - survivor[j];
3868                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3869                         score += score_tab[i-run];
3870                         if(score < last_score){
3871                             last_score= score;
3872                             last_run= run;
3873                             last_level= level-64;
3874                             last_i= i+1;
3875                         }
3876                     }
3877                 }
3878             }else{
3879                 distortion += esc_length*lambda;
3880                 for(j=survivor_count-1; j>=0; j--){
3881                     int run= i - survivor[j];
3882                     int score= distortion + score_tab[i-run];
3883
3884                     if(score < best_score){
3885                         best_score= score;
3886                         run_tab[i+1]= run;
3887                         level_tab[i+1]= level-64;
3888                     }
3889                 }
3890
3891                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3892                   for(j=survivor_count-1; j>=0; j--){
3893                         int run= i - survivor[j];
3894                         int score= distortion + score_tab[i-run];
3895                         if(score < last_score){
3896                             last_score= score;
3897                             last_run= run;
3898                             last_level= level-64;
3899                             last_i= i+1;
3900                         }
3901                     }
3902                 }
3903             }
3904         }
3905
3906         score_tab[i+1]= best_score;
3907
3908         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3909         if(last_non_zero <= 27){
3910             for(; survivor_count; survivor_count--){
3911                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3912                     break;
3913             }
3914         }else{
3915             for(; survivor_count; survivor_count--){
3916                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3917                     break;
3918             }
3919         }
3920
3921         survivor[ survivor_count++ ]= i+1;
3922     }
3923
3924     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3925         last_score= 256*256*256*120;
3926         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3927             int score= score_tab[i];
3928             if(i) score += lambda*2; //FIXME exacter?
3929
3930             if(score < last_score){
3931                 last_score= score;
3932                 last_i= i;
3933                 last_level= level_tab[i];
3934                 last_run= run_tab[i];
3935             }
3936         }
3937     }
3938
3939     s->coded_score[n] = last_score;
3940
3941     dc= FFABS(block[0]);
3942     last_non_zero= last_i - 1;
3943     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3944
3945     if(last_non_zero < start_i)
3946         return last_non_zero;
3947
3948     if(last_non_zero == 0 && start_i == 0){
3949         int best_level= 0;
3950         int best_score= dc * dc;
3951
3952         for(i=0; i<coeff_count[0]; i++){
3953             int level= coeff[i][0];
3954             int alevel= FFABS(level);
3955             int unquant_coeff, score, distortion;
3956
3957             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3958                     unquant_coeff= (alevel*qmul + qadd)>>3;
3959             }else{ //MPEG1
3960                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3961                     unquant_coeff =   (unquant_coeff - 1) | 1;
3962             }
3963             unquant_coeff = (unquant_coeff + 4) >> 3;
3964             unquant_coeff<<= 3 + 3;
3965
3966             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3967             level+=64;
3968             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3969             else                    score= distortion + esc_length*lambda;
3970
3971             if(score < best_score){
3972                 best_score= score;
3973                 best_level= level - 64;
3974             }
3975         }
3976         block[0]= best_level;
3977         s->coded_score[n] = best_score - dc*dc;
3978         if(best_level == 0) return -1;
3979         else                return last_non_zero;
3980     }
3981
3982     i= last_i;
3983     av_assert2(last_level);
3984
3985     block[ perm_scantable[last_non_zero] ]= last_level;
3986     i -= last_run + 1;
3987
3988     for(; i>start_i; i -= run_tab[i] + 1){
3989         block[ perm_scantable[i-1] ]= level_tab[i];
3990     }
3991
3992     return last_non_zero;
3993 }
3994
3995 //#define REFINE_STATS 1
3996 static int16_t basis[64][64];
3997
3998 static void build_basis(uint8_t *perm){
3999     int i, j, x, y;
4000     emms_c();
4001     for(i=0; i<8; i++){
4002         for(j=0; j<8; j++){
4003             for(y=0; y<8; y++){
4004                 for(x=0; x<8; x++){
4005                     double s= 0.25*(1<<BASIS_SHIFT);
4006                     int index= 8*i + j;
4007                     int perm_index= perm[index];
4008                     if(i==0) s*= sqrt(0.5);
4009                     if(j==0) s*= sqrt(0.5);
4010                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4011                 }
4012             }
4013         }
4014     }
4015 }
4016
4017 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4018                         int16_t *block, int16_t *weight, int16_t *orig,
4019                         int n, int qscale){
4020     int16_t rem[64];
4021     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4022     const uint8_t *scantable= s->intra_scantable.scantable;
4023     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4024 //    unsigned int threshold1, threshold2;
4025 //    int bias=0;
4026     int run_tab[65];
4027     int prev_run=0;
4028     int prev_level=0;
4029     int qmul, qadd, start_i, last_non_zero, i, dc;
4030     uint8_t * length;
4031     uint8_t * last_length;
4032     int lambda;
4033     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4034 #ifdef REFINE_STATS
4035 static int count=0;
4036 static int after_last=0;
4037 static int to_zero=0;
4038 static int from_zero=0;
4039 static int raise=0;
4040 static int lower=0;
4041 static int messed_sign=0;
4042 #endif
4043
4044     if(basis[0][0] == 0)
4045         build_basis(s->idsp.idct_permutation);
4046
4047     qmul= qscale*2;
4048     qadd= (qscale-1)|1;
4049     if (s->mb_intra) {
4050         if (!s->h263_aic) {
4051             if (n < 4)
4052                 q = s->y_dc_scale;
4053             else
4054                 q = s->c_dc_scale;
4055         } else{
4056             /* For AIC we skip quant/dequant of INTRADC */
4057             q = 1;
4058             qadd=0;
4059         }
4060         q <<= RECON_SHIFT-3;
4061         /* note: block[0] is assumed to be positive */
4062         dc= block[0]*q;
4063 //        block[0] = (block[0] + (q >> 1)) / q;
4064         start_i = 1;
4065 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4066 //            bias= 1<<(QMAT_SHIFT-1);
4067         length     = s->intra_ac_vlc_length;
4068         last_length= s->intra_ac_vlc_last_length;
4069     } else {
4070         dc= 0;
4071         start_i = 0;
4072         length     = s->inter_ac_vlc_length;
4073         last_length= s->inter_ac_vlc_last_length;
4074     }
4075     last_non_zero = s->block_last_index[n];
4076
4077 #ifdef REFINE_STATS
4078 {START_TIMER
4079 #endif
4080     dc += (1<<(RECON_SHIFT-1));
4081     for(i=0; i<64; i++){
4082         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4083     }
4084 #ifdef REFINE_STATS
4085 STOP_TIMER("memset rem[]")}
4086 #endif
4087     sum=0;
4088     for(i=0; i<64; i++){
4089         int one= 36;
4090         int qns=4;
4091         int w;
4092
4093         w= FFABS(weight[i]) + qns*one;
4094         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4095
4096         weight[i] = w;
4097 //        w=weight[i] = (63*qns + (w/2)) / w;
4098
4099         av_assert2(w>0);
4100         av_assert2(w<(1<<6));
4101         sum += w*w;
4102     }
4103     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4104 #ifdef REFINE_STATS
4105 {START_TIMER
4106 #endif
4107     run=0;
4108     rle_index=0;
4109     for(i=start_i; i<=last_non_zero; i++){
4110         int j= perm_scantable[i];
4111         const int level= block[j];
4112         int coeff;
4113
4114         if(level){
4115             if(level<0) coeff= qmul*level - qadd;
4116             else        coeff= qmul*level + qadd;
4117             run_tab[rle_index++]=run;
4118             run=0;
4119
4120             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4121         }else{
4122             run++;
4123         }
4124     }
4125 #ifdef REFINE_STATS
4126 if(last_non_zero>0){
4127 STOP_TIMER("init rem[]")
4128 }
4129 }
4130
4131 {START_TIMER
4132 #endif
4133     for(;;){
4134         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4135         int best_coeff=0;
4136         int best_change=0;
4137         int run2, best_unquant_change=0, analyze_gradient;
4138 #ifdef REFINE_STATS
4139 {START_TIMER
4140 #endif
4141         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4142
4143         if(analyze_gradient){
4144 #ifdef REFINE_STATS
4145 {START_TIMER
4146 #endif
4147             for(i=0; i<64; i++){
4148                 int w= weight[i];
4149
4150                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4151             }
4152 #ifdef REFINE_STATS
4153 STOP_TIMER("rem*w*w")}
4154 {START_TIMER
4155 #endif
4156             s->fdsp.fdct(d1);
4157 #ifdef REFINE_STATS
4158 STOP_TIMER("dct")}
4159 #endif
4160         }
4161
4162         if(start_i){
4163             const int level= block[0];
4164             int change, old_coeff;
4165
4166             av_assert2(s->mb_intra);
4167
4168             old_coeff= q*level;
4169
4170             for(change=-1; change<=1; change+=2){
4171                 int new_level= level + change;
4172                 int score, new_coeff;
4173
4174                 new_coeff= q*new_level;
4175                 if(new_coeff >= 2048 || new_coeff < 0)
4176                     continue;
4177
4178                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4179                                                   new_coeff - old_coeff);
4180                 if(score<best_score){
4181                     best_score= score;
4182                     best_coeff= 0;
4183                     best_change= change;
4184                     best_unquant_change= new_coeff - old_coeff;
4185                 }
4186             }
4187         }
4188
4189         run=0;
4190         rle_index=0;
4191         run2= run_tab[rle_index++];
4192         prev_level=0;
4193         prev_run=0;
4194
4195         for(i=start_i; i<64; i++){
4196             int j= perm_scantable[i];
4197             const int level= block[j];
4198             int change, old_coeff;
4199
4200             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4201                 break;
4202
4203             if(level){
4204                 if(level<0) old_coeff= qmul*level - qadd;
4205                 else        old_coeff= qmul*level + qadd;
4206                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4207             }else{
4208                 old_coeff=0;
4209                 run2--;
4210                 av_assert2(run2>=0 || i >= last_non_zero );
4211             }
4212
4213             for(change=-1; change<=1; change+=2){
4214                 int new_level= level + change;
4215                 int score, new_coeff, unquant_change;
4216
4217                 score=0;
4218                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4219                    continue;
4220
4221                 if(new_level){
4222                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4223                     else            new_coeff= qmul*new_level + qadd;
4224                     if(new_coeff >= 2048 || new_coeff <= -2048)
4225                         continue;
4226                     //FIXME check for overflow
4227
4228                     if(level){
4229                         if(level < 63 && level > -63){
4230                             if(i < last_non_zero)
4231                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4232                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4233                             else
4234                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4235                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4236                         }
4237                     }else{
4238                         av_assert2(FFABS(new_level)==1);
4239
4240                         if(analyze_gradient){
4241                             int g= d1[ scantable[i] ];
4242                             if(g && (g^new_level) >= 0)
4243                                 continue;
4244                         }
4245
4246                         if(i < last_non_zero){
4247                             int next_i= i + run2 + 1;
4248                             int next_level= block[ perm_scantable[next_i] ] + 64;
4249
4250                             if(next_level&(~127))
4251                                 next_level= 0;
4252
4253                             if(next_i < last_non_zero)
4254                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4255                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4256                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4257                             else
4258                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4259                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4260                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4261                         }else{
4262                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4263                             if(prev_level){
4264                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4265                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4266                             }
4267                         }
4268                     }
4269                 }else{
4270                     new_coeff=0;
4271                     av_assert2(FFABS(level)==1);
4272
4273                     if(i < last_non_zero){
4274                         int next_i= i + run2 + 1;
4275                         int next_level= block[ perm_scantable[next_i] ] + 64;
4276
4277                         if(next_level&(~127))
4278                             next_level= 0;
4279
4280                         if(next_i < last_non_zero)
4281                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4282                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4283                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4284                         else
4285                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4286                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4287                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4288                     }else{
4289                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4290                         if(prev_level){
4291                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4292                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4293                         }
4294                     }
4295                 }
4296
4297                 score *= lambda;
4298
4299                 unquant_change= new_coeff - old_coeff;
4300                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4301
4302                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4303                                                    unquant_change);
4304                 if(score<best_score){
4305                     best_score= score;
4306                     best_coeff= i;
4307                     best_change= change;
4308                     best_unquant_change= unquant_change;
4309                 }
4310             }
4311             if(level){
4312                 prev_level= level + 64;
4313                 if(prev_level&(~127))
4314                     prev_level= 0;
4315                 prev_run= run;
4316                 run=0;
4317             }else{
4318                 run++;
4319             }
4320         }
4321 #ifdef REFINE_STATS
4322 STOP_TIMER("iterative step")}
4323 #endif
4324
4325         if(best_change){
4326             int j= perm_scantable[ best_coeff ];
4327
4328             block[j] += best_change;
4329
4330             if(best_coeff > last_non_zero){
4331                 last_non_zero= best_coeff;
4332                 av_assert2(block[j]);
4333 #ifdef REFINE_STATS
4334 after_last++;
4335 #endif
4336             }else{
4337 #ifdef REFINE_STATS
4338 if(block[j]){
4339     if(block[j] - best_change){
4340         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4341             raise++;
4342         }else{
4343             lower++;
4344         }
4345     }else{
4346         from_zero++;
4347     }
4348 }else{
4349     to_zero++;
4350 }
4351 #endif
4352                 for(; last_non_zero>=start_i; last_non_zero--){
4353                     if(block[perm_scantable[last_non_zero]])
4354                         break;
4355                 }
4356             }
4357 #ifdef REFINE_STATS
4358 count++;
4359 if(256*256*256*64 % count == 0){
4360     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4361 }
4362 #endif
4363             run=0;
4364             rle_index=0;
4365             for(i=start_i; i<=last_non_zero; i++){
4366                 int j= perm_scantable[i];
4367                 const int level= block[j];
4368
4369                  if(level){
4370                      run_tab[rle_index++]=run;
4371                      run=0;
4372                  }else{
4373                      run++;
4374                  }
4375             }
4376
4377             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4378         }else{
4379             break;
4380         }
4381     }
4382 #ifdef REFINE_STATS
4383 if(last_non_zero>0){
4384 STOP_TIMER("iterative search")
4385 }
4386 }
4387 #endif
4388
4389     return last_non_zero;
4390 }
4391
4392 int ff_dct_quantize_c(MpegEncContext *s,
4393                         int16_t *block, int n,
4394                         int qscale, int *overflow)
4395 {
4396     int i, j, level, last_non_zero, q, start_i;
4397     const int *qmat;
4398     const uint8_t *scantable= s->intra_scantable.scantable;
4399     int bias;
4400     int max=0;
4401     unsigned int threshold1, threshold2;
4402
4403     s->fdsp.fdct(block);
4404
4405     if(s->dct_error_sum)
4406         s->denoise_dct(s, block);
4407
4408     if (s->mb_intra) {
4409         if (!s->h263_aic) {
4410             if (n < 4)
4411                 q = s->y_dc_scale;
4412             else
4413                 q = s->c_dc_scale;
4414             q = q << 3;
4415         } else
4416             /* For AIC we skip quant/dequant of INTRADC */
4417             q = 1 << 3;
4418
4419         /* note: block[0] is assumed to be positive */
4420         block[0] = (block[0] + (q >> 1)) / q;
4421         start_i = 1;
4422         last_non_zero = 0;
4423         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4424         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4425     } else {
4426         start_i = 0;
4427         last_non_zero = -1;
4428         qmat = s->q_inter_matrix[qscale];
4429         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4430     }
4431     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4432     threshold2= (threshold1<<1);
4433     for(i=63;i>=start_i;i--) {
4434         j = scantable[i];
4435         level = block[j] * qmat[j];
4436
4437         if(((unsigned)(level+threshold1))>threshold2){
4438             last_non_zero = i;
4439             break;
4440         }else{
4441             block[j]=0;
4442         }
4443     }
4444     for(i=start_i; i<=last_non_zero; i++) {
4445         j = scantable[i];
4446         level = block[j] * qmat[j];
4447
4448 //        if(   bias+level >= (1<<QMAT_SHIFT)
4449 //           || bias-level >= (1<<QMAT_SHIFT)){
4450         if(((unsigned)(level+threshold1))>threshold2){
4451             if(level>0){
4452                 level= (bias + level)>>QMAT_SHIFT;
4453                 block[j]= level;
4454             }else{
4455                 level= (bias - level)>>QMAT_SHIFT;
4456                 block[j]= -level;
4457             }
4458             max |=level;
4459         }else{
4460             block[j]=0;
4461         }
4462     }
4463     *overflow= s->max_qcoeff < max; //overflow might have happened
4464
4465     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4466     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4467         ff_block_permute(block, s->idsp.idct_permutation,
4468                          scantable, last_non_zero);
4469
4470     return last_non_zero;
4471 }
4472
4473 #define OFFSET(x) offsetof(MpegEncContext, x)
4474 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4475 static const AVOption h263_options[] = {
4476     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4477     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4478     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4479     FF_MPV_COMMON_OPTS
4480     { NULL },
4481 };
4482
4483 static const AVClass h263_class = {
4484     .class_name = "H.263 encoder",
4485     .item_name  = av_default_item_name,
4486     .option     = h263_options,
4487     .version    = LIBAVUTIL_VERSION_INT,
4488 };
4489
4490 AVCodec ff_h263_encoder = {
4491     .name           = "h263",
4492     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4493     .type           = AVMEDIA_TYPE_VIDEO,
4494     .id             = AV_CODEC_ID_H263,
4495     .priv_data_size = sizeof(MpegEncContext),
4496     .init           = ff_MPV_encode_init,
4497     .encode2        = ff_MPV_encode_picture,
4498     .close          = ff_MPV_encode_end,
4499     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4500     .priv_class     = &h263_class,
4501 };
4502
4503 static const AVOption h263p_options[] = {
4504     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4505     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4506     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4507     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4508     FF_MPV_COMMON_OPTS
4509     { NULL },
4510 };
4511 static const AVClass h263p_class = {
4512     .class_name = "H.263p encoder",
4513     .item_name  = av_default_item_name,
4514     .option     = h263p_options,
4515     .version    = LIBAVUTIL_VERSION_INT,
4516 };
4517
4518 AVCodec ff_h263p_encoder = {
4519     .name           = "h263p",
4520     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4521     .type           = AVMEDIA_TYPE_VIDEO,
4522     .id             = AV_CODEC_ID_H263P,
4523     .priv_data_size = sizeof(MpegEncContext),
4524     .init           = ff_MPV_encode_init,
4525     .encode2        = ff_MPV_encode_picture,
4526     .close          = ff_MPV_encode_end,
4527     .capabilities   = CODEC_CAP_SLICE_THREADS,
4528     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4529     .priv_class     = &h263p_class,
4530 };
4531
4532 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4533
4534 AVCodec ff_msmpeg4v2_encoder = {
4535     .name           = "msmpeg4v2",
4536     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4537     .type           = AVMEDIA_TYPE_VIDEO,
4538     .id             = AV_CODEC_ID_MSMPEG4V2,
4539     .priv_data_size = sizeof(MpegEncContext),
4540     .init           = ff_MPV_encode_init,
4541     .encode2        = ff_MPV_encode_picture,
4542     .close          = ff_MPV_encode_end,
4543     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4544     .priv_class     = &msmpeg4v2_class,
4545 };
4546
4547 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4548
4549 AVCodec ff_msmpeg4v3_encoder = {
4550     .name           = "msmpeg4",
4551     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4552     .type           = AVMEDIA_TYPE_VIDEO,
4553     .id             = AV_CODEC_ID_MSMPEG4V3,
4554     .priv_data_size = sizeof(MpegEncContext),
4555     .init           = ff_MPV_encode_init,
4556     .encode2        = ff_MPV_encode_picture,
4557     .close          = ff_MPV_encode_end,
4558     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4559     .priv_class     = &msmpeg4v3_class,
4560 };
4561
4562 FF_MPV_GENERIC_CLASS(wmv1)
4563
4564 AVCodec ff_wmv1_encoder = {
4565     .name           = "wmv1",
4566     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4567     .type           = AVMEDIA_TYPE_VIDEO,
4568     .id             = AV_CODEC_ID_WMV1,
4569     .priv_data_size = sizeof(MpegEncContext),
4570     .init           = ff_MPV_encode_init,
4571     .encode2        = ff_MPV_encode_picture,
4572     .close          = ff_MPV_encode_end,
4573     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4574     .priv_class     = &wmv1_class,
4575 };