git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "dsputil.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "h261.h"
  44 #include "h263.h"
  45 #include "mathops.h"
  46 #include "mpegutils.h"
  47 #include "mjpegenc.h"
  48 #include "msmpeg4.h"
  49 #include "qpeldsp.h"
  50 #include "faandct.h"
  51 #include "thread.h"
  52 #include "aandcttab.h"
  53 #include "flv.h"
  54 #include "mpeg4video.h"
  55 #include "internal.h"
  56 #include "bytestream.h"
  57 #include <limits.h>
  58 #include "sp5x.h"
  59
  60 static int encode_picture(MpegEncContext *s, int picture_number);
  61 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  62 static int sse_mb(MpegEncContext *s);
  63 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  64 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  65
  66 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  67 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  68
  69 const AVOption ff_mpv_generic_options[] = {
  70     FF_MPV_COMMON_OPTS
  71     { NULL },
  72 };
  73
  74 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
  75                        uint16_t (*qmat16)[2][64],
  76                        const uint16_t *quant_matrix,
  77                        int bias, int qmin, int qmax, int intra)
  78 {
  79     int qscale;
  80     int shift = 0;
  81
  82     for (qscale = qmin; qscale <= qmax; qscale++) {
  83         int i;
  84         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
  85             dsp->fdct == ff_jpeg_fdct_islow_10 ||
  86             dsp->fdct == ff_faandct) {
  87             for (i = 0; i < 64; i++) {
  88                 const int j = dsp->idct_permutation[i];
  89                 /* 16 <= qscale * quant_matrix[i] <= 7905
  90                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  91                  *             19952 <=              x  <= 249205026
  92                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
  93                  *           3444240 >= (1 << 36) / (x) >= 275 */
  94
  95                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
  96                                         (qscale * quant_matrix[j]));
  97             }
  98         } else if (dsp->fdct == ff_fdct_ifast) {
  99             for (i = 0; i < 64; i++) {
 100                 const int j = dsp->idct_permutation[i];
 101                 /* 16 <= qscale * quant_matrix[i] <= 7905
 102                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 103                  *             19952 <=              x  <= 249205026
 104                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 105                  *           3444240 >= (1 << 36) / (x) >= 275 */
 106
 107                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 108                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
 109             }
 110         } else {
 111             for (i = 0; i < 64; i++) {
 112                 const int j = dsp->idct_permutation[i];
 113                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 114                  * Assume x = qscale * quant_matrix[i]
 115                  * So             16 <=              x  <= 7905
 116                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 117                  * so          32768 >= (1 << 19) / (x) >= 67 */
 118                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 119                                         (qscale * quant_matrix[j]));
 120                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 121                 //                    (qscale * quant_matrix[i]);
 122                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 123                                        (qscale * quant_matrix[j]);
 124
 125                 if (qmat16[qscale][0][i] == 0 ||
 126                     qmat16[qscale][0][i] == 128 * 256)
 127                     qmat16[qscale][0][i] = 128 * 256 - 1;
 128                 qmat16[qscale][1][i] =
 129                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 130                                 qmat16[qscale][0][i]);
 131             }
 132         }
 133
 134         for (i = intra; i < 64; i++) {
 135             int64_t max = 8191;
 136             if (dsp->fdct == ff_fdct_ifast) {
 137                 max = (8191LL * ff_aanscales[i]) >> 14;
 138             }
 139             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 140                 shift++;
 141             }
 142         }
 143     }
 144     if (shift) {
 145         av_log(NULL, AV_LOG_INFO,
 146                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 147                QMAT_SHIFT - shift);
 148     }
 149 }
 150
 151 static inline void update_qscale(MpegEncContext *s)
 152 {
 153     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 154                 (FF_LAMBDA_SHIFT + 7);
 155     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 156
 157     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 158                  FF_LAMBDA_SHIFT;
 159 }
 160
 161 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 162 {
 163     int i;
 164
 165     if (matrix) {
 166         put_bits(pb, 1, 1);
 167         for (i = 0; i < 64; i++) {
 168             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 169         }
 170     } else
 171         put_bits(pb, 1, 0);
 172 }
 173
 174 /**
 175  * init s->current_picture.qscale_table from s->lambda_table
 176  */
 177 void ff_init_qscale_tab(MpegEncContext *s)
 178 {
 179     int8_t * const qscale_table = s->current_picture.qscale_table;
 180     int i;
 181
 182     for (i = 0; i < s->mb_num; i++) {
 183         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 184         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 185         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 186                                                   s->avctx->qmax);
 187     }
 188 }
 189
 190 static void update_duplicate_context_after_me(MpegEncContext *dst,
 191                                               MpegEncContext *src)
 192 {
 193 #define COPY(a) dst->a= src->a
 194     COPY(pict_type);
 195     COPY(current_picture);
 196     COPY(f_code);
 197     COPY(b_code);
 198     COPY(qscale);
 199     COPY(lambda);
 200     COPY(lambda2);
 201     COPY(picture_in_gop_number);
 202     COPY(gop_picture_number);
 203     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 204     COPY(progressive_frame);    // FIXME don't set in encode_header
 205     COPY(partitioned_frame);    // FIXME don't set in encode_header
 206 #undef COPY
 207 }
 208
 209 /**
 210  * Set the given MpegEncContext to defaults for encoding.
 211  * the changed fields will not depend upon the prior state of the MpegEncContext.
 212  */
 213 static void MPV_encode_defaults(MpegEncContext *s)
 214 {
 215     int i;
 216     ff_MPV_common_defaults(s);
 217
 218     for (i = -16; i < 16; i++) {
 219         default_fcode_tab[i + MAX_MV] = 1;
 220     }
 221     s->me.mv_penalty = default_mv_penalty;
 222     s->fcode_tab     = default_fcode_tab;
 223
 224     s->input_picture_number  = 0;
 225     s->picture_in_gop_number = 0;
 226 }
 227
 228 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 229     if (ARCH_X86)
 230         ff_dct_encode_init_x86(s);
 231
 232     if (CONFIG_H263_ENCODER)
 233         ff_h263dsp_init(&s->h263dsp);
 234     if (!s->dct_quantize)
 235         s->dct_quantize = ff_dct_quantize_c;
 236     if (!s->denoise_dct)
 237         s->denoise_dct  = denoise_dct_c;
 238     s->fast_dct_quantize = s->dct_quantize;
 239     if (s->avctx->trellis)
 240         s->dct_quantize  = dct_quantize_trellis_c;
 241
 242     return 0;
 243 }
 244
 245 /* init video encoder */
 246 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
 247 {
 248     MpegEncContext *s = avctx->priv_data;
 249     int i, ret;
 250
 251     MPV_encode_defaults(s);
 252
 253     switch (avctx->codec_id) {
 254     case AV_CODEC_ID_MPEG2VIDEO:
 255         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 256             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 257             av_log(avctx, AV_LOG_ERROR,
 258                    "only YUV420 and YUV422 are supported\n");
 259             return -1;
 260         }
 261         break;
 262     case AV_CODEC_ID_MJPEG:
 263     case AV_CODEC_ID_AMV:
 264         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
 265             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
 266             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
 267             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 268               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
 269               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
 270              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
 271             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 272             return -1;
 273         }
 274         break;
 275     default:
 276         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 277             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 278             return -1;
 279         }
 280     }
 281
 282     switch (avctx->pix_fmt) {
 283     case AV_PIX_FMT_YUVJ444P:
 284     case AV_PIX_FMT_YUV444P:
 285         s->chroma_format = CHROMA_444;
 286         break;
 287     case AV_PIX_FMT_YUVJ422P:
 288     case AV_PIX_FMT_YUV422P:
 289         s->chroma_format = CHROMA_422;
 290         break;
 291     case AV_PIX_FMT_YUVJ420P:
 292     case AV_PIX_FMT_YUV420P:
 293     default:
 294         s->chroma_format = CHROMA_420;
 295         break;
 296     }
 297
 298     s->bit_rate = avctx->bit_rate;
 299     s->width    = avctx->width;
 300     s->height   = avctx->height;
 301     if (avctx->gop_size > 600 &&
 302         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 303         av_log(avctx, AV_LOG_WARNING,
 304                "keyframe interval too large!, reducing it from %d to %d\n",
 305                avctx->gop_size, 600);
 306         avctx->gop_size = 600;
 307     }
 308     s->gop_size     = avctx->gop_size;
 309     s->avctx        = avctx;
 310     s->flags        = avctx->flags;
 311     s->flags2       = avctx->flags2;
 312     if (avctx->max_b_frames > MAX_B_FRAMES) {
 313         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 314                "is %d.\n", MAX_B_FRAMES);
 315         avctx->max_b_frames = MAX_B_FRAMES;
 316     }
 317     s->max_b_frames = avctx->max_b_frames;
 318     s->codec_id     = avctx->codec->id;
 319     s->strict_std_compliance = avctx->strict_std_compliance;
 320     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 321     s->mpeg_quant         = avctx->mpeg_quant;
 322     s->rtp_mode           = !!avctx->rtp_payload_size;
 323     s->intra_dc_precision = avctx->intra_dc_precision;
 324     s->user_specified_pts = AV_NOPTS_VALUE;
 325
 326     if (s->gop_size <= 1) {
 327         s->intra_only = 1;
 328         s->gop_size   = 12;
 329     } else {
 330         s->intra_only = 0;
 331     }
 332
 333     s->me_method = avctx->me_method;
 334
 335     /* Fixed QSCALE */
 336     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 337
 338     s->adaptive_quant = (s->avctx->lumi_masking ||
 339                          s->avctx->dark_masking ||
 340                          s->avctx->temporal_cplx_masking ||
 341                          s->avctx->spatial_cplx_masking  ||
 342                          s->avctx->p_masking      ||
 343                          s->avctx->border_masking ||
 344                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 345                         !s->fixed_qscale;
 346
 347     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 348
 349     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 350         switch(avctx->codec_id) {
 351         case AV_CODEC_ID_MPEG1VIDEO:
 352         case AV_CODEC_ID_MPEG2VIDEO:
 353             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
 354             break;
 355         case AV_CODEC_ID_MPEG4:
 356         case AV_CODEC_ID_MSMPEG4V1:
 357         case AV_CODEC_ID_MSMPEG4V2:
 358         case AV_CODEC_ID_MSMPEG4V3:
 359             if       (avctx->rc_max_rate >= 15000000) {
 360                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
 361             } else if(avctx->rc_max_rate >=  2000000) {
 362                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
 363             } else if(avctx->rc_max_rate >=   384000) {
 364                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
 365             } else
 366                 avctx->rc_buffer_size = 40;
 367             avctx->rc_buffer_size *= 16384;
 368             break;
 369         }
 370         if (avctx->rc_buffer_size) {
 371             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 372         }
 373     }
 374
 375     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 376         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 377         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
 378             return -1;
 379     }
 380
 381     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 382         av_log(avctx, AV_LOG_INFO,
 383                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 384     }
 385
 386     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 387         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 388         return -1;
 389     }
 390
 391     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 392         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 393         return -1;
 394     }
 395
 396     if (avctx->rc_max_rate &&
 397         avctx->rc_max_rate == avctx->bit_rate &&
 398         avctx->rc_max_rate != avctx->rc_min_rate) {
 399         av_log(avctx, AV_LOG_INFO,
 400                "impossible bitrate constraints, this will fail\n");
 401     }
 402
 403     if (avctx->rc_buffer_size &&
 404         avctx->bit_rate * (int64_t)avctx->time_base.num >
 405             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 406         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 407         return -1;
 408     }
 409
 410     if (!s->fixed_qscale &&
 411         avctx->bit_rate * av_q2d(avctx->time_base) >
 412             avctx->bit_rate_tolerance) {
 413         av_log(avctx, AV_LOG_WARNING,
 414                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 415         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 416     }
 417
 418     if (s->avctx->rc_max_rate &&
 419         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 420         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 421          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 422         90000LL * (avctx->rc_buffer_size - 1) >
 423             s->avctx->rc_max_rate * 0xFFFFLL) {
 424         av_log(avctx, AV_LOG_INFO,
 425                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 426                "specified vbv buffer is too large for the given bitrate!\n");
 427     }
 428
 429     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 430         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 431         s->codec_id != AV_CODEC_ID_FLV1) {
 432         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 433         return -1;
 434     }
 435
 436     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 437         av_log(avctx, AV_LOG_ERROR,
 438                "OBMC is only supported with simple mb decision\n");
 439         return -1;
 440     }
 441
 442     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 443         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 444         return -1;
 445     }
 446
 447     if (s->max_b_frames                    &&
 448         s->codec_id != AV_CODEC_ID_MPEG4      &&
 449         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 450         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 451         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 452         return -1;
 453     }
 454     if (s->max_b_frames < 0) {
 455         av_log(avctx, AV_LOG_ERROR,
 456                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 457         return -1;
 458     }
 459
 460     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 461          s->codec_id == AV_CODEC_ID_H263  ||
 462          s->codec_id == AV_CODEC_ID_H263P) &&
 463         (avctx->sample_aspect_ratio.num > 255 ||
 464          avctx->sample_aspect_ratio.den > 255)) {
 465         av_log(avctx, AV_LOG_WARNING,
 466                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 467                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 468         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 469                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 470     }
 471
 472     if ((s->codec_id == AV_CODEC_ID_H263  ||
 473          s->codec_id == AV_CODEC_ID_H263P) &&
 474         (avctx->width  > 2048 ||
 475          avctx->height > 1152 )) {
 476         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 477         return -1;
 478     }
 479     if ((s->codec_id == AV_CODEC_ID_H263  ||
 480          s->codec_id == AV_CODEC_ID_H263P) &&
 481         ((avctx->width &3) ||
 482          (avctx->height&3) )) {
 483         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 484         return -1;
 485     }
 486
 487     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 488         (avctx->width  > 4095 ||
 489          avctx->height > 4095 )) {
 490         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 491         return -1;
 492     }
 493
 494     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 495         (avctx->width  > 16383 ||
 496          avctx->height > 16383 )) {
 497         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 498         return -1;
 499     }
 500
 501     if (s->codec_id == AV_CODEC_ID_RV10 &&
 502         (avctx->width &15 ||
 503          avctx->height&15 )) {
 504         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 505         return AVERROR(EINVAL);
 506     }
 507
 508     if (s->codec_id == AV_CODEC_ID_RV20 &&
 509         (avctx->width &3 ||
 510          avctx->height&3 )) {
 511         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 512         return AVERROR(EINVAL);
 513     }
 514
 515     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 516          s->codec_id == AV_CODEC_ID_WMV2) &&
 517          avctx->width & 1) {
 518          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 519          return -1;
 520     }
 521
 522     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 523         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 524         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 525         return -1;
 526     }
 527
 528     // FIXME mpeg2 uses that too
 529     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 530                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 531         av_log(avctx, AV_LOG_ERROR,
 532                "mpeg2 style quantization not supported by codec\n");
 533         return -1;
 534     }
 535
 536     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 537         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 538         return -1;
 539     }
 540
 541     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 542         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 543         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 544         return -1;
 545     }
 546
 547     if (s->avctx->scenechange_threshold < 1000000000 &&
 548         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 549         av_log(avctx, AV_LOG_ERROR,
 550                "closed gop with scene change detection are not supported yet, "
 551                "set threshold to 1000000000\n");
 552         return -1;
 553     }
 554
 555     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 556         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 557             av_log(avctx, AV_LOG_ERROR,
 558                   "low delay forcing is only available for mpeg2\n");
 559             return -1;
 560         }
 561         if (s->max_b_frames != 0) {
 562             av_log(avctx, AV_LOG_ERROR,
 563                    "b frames cannot be used with low delay\n");
 564             return -1;
 565         }
 566     }
 567
 568     if (s->q_scale_type == 1) {
 569         if (avctx->qmax > 12) {
 570             av_log(avctx, AV_LOG_ERROR,
 571                    "non linear quant only supports qmax <= 12 currently\n");
 572             return -1;
 573         }
 574     }
 575
 576     if (s->avctx->thread_count > 1         &&
 577         s->codec_id != AV_CODEC_ID_MPEG4      &&
 578         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 579         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 580         s->codec_id != AV_CODEC_ID_MJPEG      &&
 581         (s->codec_id != AV_CODEC_ID_H263P)) {
 582         av_log(avctx, AV_LOG_ERROR,
 583                "multi threaded encoding not supported by codec\n");
 584         return -1;
 585     }
 586
 587     if (s->avctx->thread_count < 1) {
 588         av_log(avctx, AV_LOG_ERROR,
 589                "automatic thread number detection not supported by codec, "
 590                "patch welcome\n");
 591         return -1;
 592     }
 593
 594     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 595         s->rtp_mode = 1;
 596
 597     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 598         s->h263_slice_structured = 1;
 599
 600     if (!avctx->time_base.den || !avctx->time_base.num) {
 601         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 602         return -1;
 603     }
 604
 605     i = (INT_MAX / 2 + 128) >> 8;
 606     if (avctx->mb_threshold >= i) {
 607         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 608                i - 1);
 609         return -1;
 610     }
 611
 612     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 613         av_log(avctx, AV_LOG_INFO,
 614                "notice: b_frame_strategy only affects the first pass\n");
 615         avctx->b_frame_strategy = 0;
 616     }
 617
 618     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 619     if (i > 1) {
 620         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 621         avctx->time_base.den /= i;
 622         avctx->time_base.num /= i;
 623         //return -1;
 624     }
 625
 626     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 627         // (a + x * 3 / 8) / x
 628         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 629         s->inter_quant_bias = 0;
 630     } else {
 631         s->intra_quant_bias = 0;
 632         // (a - x / 4) / x
 633         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 634     }
 635
 636     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 637         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 638         return AVERROR(EINVAL);
 639     }
 640
 641     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 642         s->intra_quant_bias = avctx->intra_quant_bias;
 643     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 644         s->inter_quant_bias = avctx->inter_quant_bias;
 645
 646     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 647
 648     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 649         s->avctx->time_base.den > (1 << 16) - 1) {
 650         av_log(avctx, AV_LOG_ERROR,
 651                "timebase %d/%d not supported by MPEG 4 standard, "
 652                "the maximum admitted value for the timebase denominator "
 653                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 654                (1 << 16) - 1);
 655         return -1;
 656     }
 657     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 658
 659     switch (avctx->codec->id) {
 660     case AV_CODEC_ID_MPEG1VIDEO:
 661         s->out_format = FMT_MPEG1;
 662         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 663         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 664         break;
 665     case AV_CODEC_ID_MPEG2VIDEO:
 666         s->out_format = FMT_MPEG1;
 667         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 668         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 669         s->rtp_mode   = 1;
 670         break;
 671     case AV_CODEC_ID_MJPEG:
 672     case AV_CODEC_ID_AMV:
 673         s->out_format = FMT_MJPEG;
 674         s->intra_only = 1; /* force intra only for jpeg */
 675         if (!CONFIG_MJPEG_ENCODER ||
 676             ff_mjpeg_encode_init(s) < 0)
 677             return -1;
 678         avctx->delay = 0;
 679         s->low_delay = 1;
 680         break;
 681     case AV_CODEC_ID_H261:
 682         if (!CONFIG_H261_ENCODER)
 683             return -1;
 684         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 685             av_log(avctx, AV_LOG_ERROR,
 686                    "The specified picture size of %dx%d is not valid for the "
 687                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 688                     s->width, s->height);
 689             return -1;
 690         }
 691         s->out_format = FMT_H261;
 692         avctx->delay  = 0;
 693         s->low_delay  = 1;
 694         break;
 695     case AV_CODEC_ID_H263:
 696         if (!CONFIG_H263_ENCODER)
 697             return -1;
 698         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 699                              s->width, s->height) == 8) {
 700             av_log(avctx, AV_LOG_ERROR,
 701                    "The specified picture size of %dx%d is not valid for "
 702                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 703                    "352x288, 704x576, and 1408x1152. "
 704                    "Try H.263+.\n", s->width, s->height);
 705             return -1;
 706         }
 707         s->out_format = FMT_H263;
 708         avctx->delay  = 0;
 709         s->low_delay  = 1;
 710         break;
 711     case AV_CODEC_ID_H263P:
 712         s->out_format = FMT_H263;
 713         s->h263_plus  = 1;
 714         /* Fx */
 715         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 716         s->modified_quant  = s->h263_aic;
 717         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 718         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 719
 720         /* /Fx */
 721         /* These are just to be sure */
 722         avctx->delay = 0;
 723         s->low_delay = 1;
 724         break;
 725     case AV_CODEC_ID_FLV1:
 726         s->out_format      = FMT_H263;
 727         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 728         s->unrestricted_mv = 1;
 729         s->rtp_mode  = 0; /* don't allow GOB */
 730         avctx->delay = 0;
 731         s->low_delay = 1;
 732         break;
 733     case AV_CODEC_ID_RV10:
 734         s->out_format = FMT_H263;
 735         avctx->delay  = 0;
 736         s->low_delay  = 1;
 737         break;
 738     case AV_CODEC_ID_RV20:
 739         s->out_format      = FMT_H263;
 740         avctx->delay       = 0;
 741         s->low_delay       = 1;
 742         s->modified_quant  = 1;
 743         s->h263_aic        = 1;
 744         s->h263_plus       = 1;
 745         s->loop_filter     = 1;
 746         s->unrestricted_mv = 0;
 747         break;
 748     case AV_CODEC_ID_MPEG4:
 749         s->out_format      = FMT_H263;
 750         s->h263_pred       = 1;
 751         s->unrestricted_mv = 1;
 752         s->low_delay       = s->max_b_frames ? 0 : 1;
 753         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 754         break;
 755     case AV_CODEC_ID_MSMPEG4V2:
 756         s->out_format      = FMT_H263;
 757         s->h263_pred       = 1;
 758         s->unrestricted_mv = 1;
 759         s->msmpeg4_version = 2;
 760         avctx->delay       = 0;
 761         s->low_delay       = 1;
 762         break;
 763     case AV_CODEC_ID_MSMPEG4V3:
 764         s->out_format        = FMT_H263;
 765         s->h263_pred         = 1;
 766         s->unrestricted_mv   = 1;
 767         s->msmpeg4_version   = 3;
 768         s->flipflop_rounding = 1;
 769         avctx->delay         = 0;
 770         s->low_delay         = 1;
 771         break;
 772     case AV_CODEC_ID_WMV1:
 773         s->out_format        = FMT_H263;
 774         s->h263_pred         = 1;
 775         s->unrestricted_mv   = 1;
 776         s->msmpeg4_version   = 4;
 777         s->flipflop_rounding = 1;
 778         avctx->delay         = 0;
 779         s->low_delay         = 1;
 780         break;
 781     case AV_CODEC_ID_WMV2:
 782         s->out_format        = FMT_H263;
 783         s->h263_pred         = 1;
 784         s->unrestricted_mv   = 1;
 785         s->msmpeg4_version   = 5;
 786         s->flipflop_rounding = 1;
 787         avctx->delay         = 0;
 788         s->low_delay         = 1;
 789         break;
 790     default:
 791         return -1;
 792     }
 793
 794     avctx->has_b_frames = !s->low_delay;
 795
 796     s->encoding = 1;
 797
 798     s->progressive_frame    =
 799     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 800                                                 CODEC_FLAG_INTERLACED_ME) ||
 801                                 s->alternate_scan);
 802
 803     /* init */
 804     if (ff_MPV_common_init(s) < 0)
 805         return -1;
 806
 807     ff_qpeldsp_init(&s->qdsp);
 808
 809     s->avctx->coded_frame = s->current_picture.f;
 810
 811     if (s->msmpeg4_version) {
 812         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 813                           2 * 2 * (MAX_LEVEL + 1) *
 814                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 815     }
 816     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 817
 818     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 819     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 820     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 821     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 822     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 823     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 824     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 825                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 826     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 827                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 828
 829     if (s->avctx->noise_reduction) {
 830         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 831                           2 * 64 * sizeof(uint16_t), fail);
 832     }
 833
 834     ff_dct_encode_init(s);
 835
 836     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 837         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 838
 839     s->quant_precision = 5;
 840
 841     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
 842     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
 843
 844     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 845         ff_h261_encode_init(s);
 846     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 847         ff_h263_encode_init(s);
 848     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 849         ff_msmpeg4_encode_init(s);
 850     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 851         && s->out_format == FMT_MPEG1)
 852         ff_mpeg1_encode_init(s);
 853
 854     /* init q matrix */
 855     for (i = 0; i < 64; i++) {
 856         int j = s->dsp.idct_permutation[i];
 857         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 858             s->mpeg_quant) {
 859             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 860             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 861         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 862             s->intra_matrix[j] =
 863             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 864         } else {
 865             /* mpeg1/2 */
 866             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 867             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 868         }
 869         if (s->avctx->intra_matrix)
 870             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 871         if (s->avctx->inter_matrix)
 872             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 873     }
 874
 875     /* precompute matrix */
 876     /* for mjpeg, we do include qscale in the matrix */
 877     if (s->out_format != FMT_MJPEG) {
 878         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
 879                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 880                           31, 1);
 881         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
 882                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 883                           31, 0);
 884     }
 885
 886     if (ff_rate_control_init(s) < 0)
 887         return -1;
 888
 889 #if FF_API_ERROR_RATE
 890     FF_DISABLE_DEPRECATION_WARNINGS
 891     if (avctx->error_rate)
 892         s->error_rate = avctx->error_rate;
 893     FF_ENABLE_DEPRECATION_WARNINGS;
 894 #endif
 895
 896 #if FF_API_NORMALIZE_AQP
 897     FF_DISABLE_DEPRECATION_WARNINGS
 898     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 899         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 900     FF_ENABLE_DEPRECATION_WARNINGS;
 901 #endif
 902
 903 #if FF_API_MV0
 904     FF_DISABLE_DEPRECATION_WARNINGS
 905     if (avctx->flags & CODEC_FLAG_MV0)
 906         s->mpv_flags |= FF_MPV_FLAG_MV0;
 907     FF_ENABLE_DEPRECATION_WARNINGS
 908 #endif
 909
 910     if (avctx->b_frame_strategy == 2) {
 911         for (i = 0; i < s->max_b_frames + 2; i++) {
 912             s->tmp_frames[i] = av_frame_alloc();
 913             if (!s->tmp_frames[i])
 914                 return AVERROR(ENOMEM);
 915
 916             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 917             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 918             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 919
 920             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 921             if (ret < 0)
 922                 return ret;
 923         }
 924     }
 925
 926     return 0;
 927 fail:
 928     ff_MPV_encode_end(avctx);
 929     return AVERROR_UNKNOWN;
 930 }
 931
 932 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
 933 {
 934     MpegEncContext *s = avctx->priv_data;
 935     int i;
 936
 937     ff_rate_control_uninit(s);
 938
 939     ff_MPV_common_end(s);
 940     if (CONFIG_MJPEG_ENCODER &&
 941         s->out_format == FMT_MJPEG)
 942         ff_mjpeg_encode_close(s);
 943
 944     av_freep(&avctx->extradata);
 945
 946     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 947         av_frame_free(&s->tmp_frames[i]);
 948
 949     ff_free_picture_tables(&s->new_picture);
 950     ff_mpeg_unref_picture(s, &s->new_picture);
 951
 952     av_freep(&s->avctx->stats_out);
 953     av_freep(&s->ac_stats);
 954
 955     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
 956     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
 957     s->q_chroma_intra_matrix=   NULL;
 958     s->q_chroma_intra_matrix16= NULL;
 959     av_freep(&s->q_intra_matrix);
 960     av_freep(&s->q_inter_matrix);
 961     av_freep(&s->q_intra_matrix16);
 962     av_freep(&s->q_inter_matrix16);
 963     av_freep(&s->input_picture);
 964     av_freep(&s->reordered_input_picture);
 965     av_freep(&s->dct_offset);
 966
 967     return 0;
 968 }
 969
 970 static int get_sae(uint8_t *src, int ref, int stride)
 971 {
 972     int x,y;
 973     int acc = 0;
 974
 975     for (y = 0; y < 16; y++) {
 976         for (x = 0; x < 16; x++) {
 977             acc += FFABS(src[x + y * stride] - ref);
 978         }
 979     }
 980
 981     return acc;
 982 }
 983
 984 static int get_intra_count(MpegEncContext *s, uint8_t *src,
 985                            uint8_t *ref, int stride)
 986 {
 987     int x, y, w, h;
 988     int acc = 0;
 989
 990     w = s->width  & ~15;
 991     h = s->height & ~15;
 992
 993     for (y = 0; y < h; y += 16) {
 994         for (x = 0; x < w; x += 16) {
 995             int offset = x + y * stride;
 996             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
 997                                      16);
 998             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
 999             int sae  = get_sae(src + offset, mean, stride);
1000
1001             acc += sae + 500 < sad;
1002         }
1003     }
1004     return acc;
1005 }
1006
1007
1008 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1009 {
1010     Picture *pic = NULL;
1011     int64_t pts;
1012     int i, display_picture_number = 0, ret;
1013     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1014                                                  (s->low_delay ? 0 : 1);
1015     int direct = 1;
1016
1017     if (pic_arg) {
1018         pts = pic_arg->pts;
1019         display_picture_number = s->input_picture_number++;
1020
1021         if (pts != AV_NOPTS_VALUE) {
1022             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1023                 int64_t last = s->user_specified_pts;
1024
1025                 if (pts <= last) {
1026                     av_log(s->avctx, AV_LOG_ERROR,
1027                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1028                            pts, last);
1029                     return AVERROR(EINVAL);
1030                 }
1031
1032                 if (!s->low_delay && display_picture_number == 1)
1033                     s->dts_delta = pts - last;
1034             }
1035             s->user_specified_pts = pts;
1036         } else {
1037             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1038                 s->user_specified_pts =
1039                 pts = s->user_specified_pts + 1;
1040                 av_log(s->avctx, AV_LOG_INFO,
1041                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1042                        pts);
1043             } else {
1044                 pts = display_picture_number;
1045             }
1046         }
1047     }
1048
1049     if (pic_arg) {
1050         if (!pic_arg->buf[0])
1051             direct = 0;
1052         if (pic_arg->linesize[0] != s->linesize)
1053             direct = 0;
1054         if (pic_arg->linesize[1] != s->uvlinesize)
1055             direct = 0;
1056         if (pic_arg->linesize[2] != s->uvlinesize)
1057             direct = 0;
1058         if ((s->width & 15) || (s->height & 15))
1059             direct = 0;
1060         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1061             direct = 0;
1062         if (s->linesize & (STRIDE_ALIGN-1))
1063             direct = 0;
1064
1065         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1066                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1067
1068         if (direct) {
1069             i = ff_find_unused_picture(s, 1);
1070             if (i < 0)
1071                 return i;
1072
1073             pic = &s->picture[i];
1074             pic->reference = 3;
1075
1076             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1077                 return ret;
1078             if (ff_alloc_picture(s, pic, 1) < 0) {
1079                 return -1;
1080             }
1081         } else {
1082             i = ff_find_unused_picture(s, 0);
1083             if (i < 0)
1084                 return i;
1085
1086             pic = &s->picture[i];
1087             pic->reference = 3;
1088
1089             if (ff_alloc_picture(s, pic, 0) < 0) {
1090                 return -1;
1091             }
1092
1093             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1094                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1095                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1096                 // empty
1097             } else {
1098                 int h_chroma_shift, v_chroma_shift;
1099                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1100                                                  &h_chroma_shift,
1101                                                  &v_chroma_shift);
1102
1103                 for (i = 0; i < 3; i++) {
1104                     int src_stride = pic_arg->linesize[i];
1105                     int dst_stride = i ? s->uvlinesize : s->linesize;
1106                     int h_shift = i ? h_chroma_shift : 0;
1107                     int v_shift = i ? v_chroma_shift : 0;
1108                     int w = s->width  >> h_shift;
1109                     int h = s->height >> v_shift;
1110                     uint8_t *src = pic_arg->data[i];
1111                     uint8_t *dst = pic->f->data[i];
1112
1113                     if (s->codec_id == AV_CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1114                         h = ((s->height + 15)/16*16) >> v_shift;
1115                     }
1116
1117                     if (!s->avctx->rc_buffer_size)
1118                         dst += INPLACE_OFFSET;
1119
1120                     if (src_stride == dst_stride)
1121                         memcpy(dst, src, src_stride * h);
1122                     else {
1123                         int h2 = h;
1124                         uint8_t *dst2 = dst;
1125                         while (h2--) {
1126                             memcpy(dst2, src, w);
1127                             dst2 += dst_stride;
1128                             src += src_stride;
1129                         }
1130                     }
1131                     if ((s->width & 15) || (s->height & 15)) {
1132                         s->dsp.draw_edges(dst, dst_stride,
1133                                           w, h,
1134                                           16>>h_shift,
1135                                           16>>v_shift,
1136                                           EDGE_BOTTOM);
1137                     }
1138                 }
1139             }
1140         }
1141         ret = av_frame_copy_props(pic->f, pic_arg);
1142         if (ret < 0)
1143             return ret;
1144
1145         pic->f->display_picture_number = display_picture_number;
1146         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1147     }
1148
1149     /* shift buffer entries */
1150     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1151         s->input_picture[i - 1] = s->input_picture[i];
1152
1153     s->input_picture[encoding_delay] = (Picture*) pic;
1154
1155     return 0;
1156 }
1157
1158 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1159 {
1160     int x, y, plane;
1161     int score = 0;
1162     int64_t score64 = 0;
1163
1164     for (plane = 0; plane < 3; plane++) {
1165         const int stride = p->f->linesize[plane];
1166         const int bw = plane ? 1 : 2;
1167         for (y = 0; y < s->mb_height * bw; y++) {
1168             for (x = 0; x < s->mb_width * bw; x++) {
1169                 int off = p->shared ? 0 : 16;
1170                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1171                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1172                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1173
1174                 switch (FFABS(s->avctx->frame_skip_exp)) {
1175                 case 0: score    =  FFMAX(score, v);          break;
1176                 case 1: score   += FFABS(v);                  break;
1177                 case 2: score64 += v * (int64_t)v;                       break;
1178                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1179                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1180                 }
1181             }
1182         }
1183     }
1184     emms_c();
1185
1186     if (score)
1187         score64 = score;
1188     if (s->avctx->frame_skip_exp < 0)
1189         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1190                       -1.0/s->avctx->frame_skip_exp);
1191
1192     if (score64 < s->avctx->frame_skip_threshold)
1193         return 1;
1194     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1195         return 1;
1196     return 0;
1197 }
1198
1199 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1200 {
1201     AVPacket pkt = { 0 };
1202     int ret, got_output;
1203
1204     av_init_packet(&pkt);
1205     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1206     if (ret < 0)
1207         return ret;
1208
1209     ret = pkt.size;
1210     av_free_packet(&pkt);
1211     return ret;
1212 }
1213
1214 static int estimate_best_b_count(MpegEncContext *s)
1215 {
1216     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1217     AVCodecContext *c = avcodec_alloc_context3(NULL);
1218     const int scale = s->avctx->brd_scale;
1219     int i, j, out_size, p_lambda, b_lambda, lambda2;
1220     int64_t best_rd  = INT64_MAX;
1221     int best_b_count = -1;
1222
1223     av_assert0(scale >= 0 && scale <= 3);
1224
1225     //emms_c();
1226     //s->next_picture_ptr->quality;
1227     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1228     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1229     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1230     if (!b_lambda) // FIXME we should do this somewhere else
1231         b_lambda = p_lambda;
1232     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1233                FF_LAMBDA_SHIFT;
1234
1235     c->width        = s->width  >> scale;
1236     c->height       = s->height >> scale;
1237     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1238     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1239     c->mb_decision  = s->avctx->mb_decision;
1240     c->me_cmp       = s->avctx->me_cmp;
1241     c->mb_cmp       = s->avctx->mb_cmp;
1242     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1243     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1244     c->time_base    = s->avctx->time_base;
1245     c->max_b_frames = s->max_b_frames;
1246
1247     if (avcodec_open2(c, codec, NULL) < 0)
1248         return -1;
1249
1250     for (i = 0; i < s->max_b_frames + 2; i++) {
1251         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1252                                                 s->next_picture_ptr;
1253
1254         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1255             pre_input = *pre_input_ptr;
1256
1257             if (!pre_input.shared && i) {
1258                 pre_input.f->data[0] += INPLACE_OFFSET;
1259                 pre_input.f->data[1] += INPLACE_OFFSET;
1260                 pre_input.f->data[2] += INPLACE_OFFSET;
1261             }
1262
1263             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1264                                  pre_input.f->data[0], pre_input.f->linesize[0],
1265                                  c->width,      c->height);
1266             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1267                                  pre_input.f->data[1], pre_input.f->linesize[1],
1268                                  c->width >> 1, c->height >> 1);
1269             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1270                                  pre_input.f->data[2], pre_input.f->linesize[2],
1271                                  c->width >> 1, c->height >> 1);
1272         }
1273     }
1274
1275     for (j = 0; j < s->max_b_frames + 1; j++) {
1276         int64_t rd = 0;
1277
1278         if (!s->input_picture[j])
1279             break;
1280
1281         c->error[0] = c->error[1] = c->error[2] = 0;
1282
1283         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1284         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1285
1286         out_size = encode_frame(c, s->tmp_frames[0]);
1287
1288         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1289
1290         for (i = 0; i < s->max_b_frames + 1; i++) {
1291             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1292
1293             s->tmp_frames[i + 1]->pict_type = is_p ?
1294                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1295             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1296
1297             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1298
1299             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1300         }
1301
1302         /* get the delayed frames */
1303         while (out_size) {
1304             out_size = encode_frame(c, NULL);
1305             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1306         }
1307
1308         rd += c->error[0] + c->error[1] + c->error[2];
1309
1310         if (rd < best_rd) {
1311             best_rd = rd;
1312             best_b_count = j;
1313         }
1314     }
1315
1316     avcodec_close(c);
1317     av_freep(&c);
1318
1319     return best_b_count;
1320 }
1321
1322 static int select_input_picture(MpegEncContext *s)
1323 {
1324     int i, ret;
1325
1326     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1327         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1328     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1329
1330     /* set next picture type & ordering */
1331     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1332         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1333             if (s->picture_in_gop_number < s->gop_size &&
1334                 s->next_picture_ptr &&
1335                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1336                 // FIXME check that te gop check above is +-1 correct
1337                 av_frame_unref(s->input_picture[0]->f);
1338
1339                 ff_vbv_update(s, 0);
1340
1341                 goto no_output_pic;
1342             }
1343         }
1344
1345         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1346             s->next_picture_ptr == NULL || s->intra_only) {
1347             s->reordered_input_picture[0] = s->input_picture[0];
1348             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1349             s->reordered_input_picture[0]->f->coded_picture_number =
1350                 s->coded_picture_number++;
1351         } else {
1352             int b_frames;
1353
1354             if (s->flags & CODEC_FLAG_PASS2) {
1355                 for (i = 0; i < s->max_b_frames + 1; i++) {
1356                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1357
1358                     if (pict_num >= s->rc_context.num_entries)
1359                         break;
1360                     if (!s->input_picture[i]) {
1361                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1362                         break;
1363                     }
1364
1365                     s->input_picture[i]->f->pict_type =
1366                         s->rc_context.entry[pict_num].new_pict_type;
1367                 }
1368             }
1369
1370             if (s->avctx->b_frame_strategy == 0) {
1371                 b_frames = s->max_b_frames;
1372                 while (b_frames && !s->input_picture[b_frames])
1373                     b_frames--;
1374             } else if (s->avctx->b_frame_strategy == 1) {
1375                 for (i = 1; i < s->max_b_frames + 1; i++) {
1376                     if (s->input_picture[i] &&
1377                         s->input_picture[i]->b_frame_score == 0) {
1378                         s->input_picture[i]->b_frame_score =
1379                             get_intra_count(s,
1380                                             s->input_picture[i    ]->f->data[0],
1381                                             s->input_picture[i - 1]->f->data[0],
1382                                             s->linesize) + 1;
1383                     }
1384                 }
1385                 for (i = 0; i < s->max_b_frames + 1; i++) {
1386                     if (s->input_picture[i] == NULL ||
1387                         s->input_picture[i]->b_frame_score - 1 >
1388                             s->mb_num / s->avctx->b_sensitivity)
1389                         break;
1390                 }
1391
1392                 b_frames = FFMAX(0, i - 1);
1393
1394                 /* reset scores */
1395                 for (i = 0; i < b_frames + 1; i++) {
1396                     s->input_picture[i]->b_frame_score = 0;
1397                 }
1398             } else if (s->avctx->b_frame_strategy == 2) {
1399                 b_frames = estimate_best_b_count(s);
1400             } else {
1401                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1402                 b_frames = 0;
1403             }
1404
1405             emms_c();
1406
1407             for (i = b_frames - 1; i >= 0; i--) {
1408                 int type = s->input_picture[i]->f->pict_type;
1409                 if (type && type != AV_PICTURE_TYPE_B)
1410                     b_frames = i;
1411             }
1412             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1413                 b_frames == s->max_b_frames) {
1414                 av_log(s->avctx, AV_LOG_ERROR,
1415                        "warning, too many b frames in a row\n");
1416             }
1417
1418             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1419                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1420                     s->gop_size > s->picture_in_gop_number) {
1421                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1422                 } else {
1423                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1424                         b_frames = 0;
1425                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1426                 }
1427             }
1428
1429             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1430                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1431                 b_frames--;
1432
1433             s->reordered_input_picture[0] = s->input_picture[b_frames];
1434             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1435                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1436             s->reordered_input_picture[0]->f->coded_picture_number =
1437                 s->coded_picture_number++;
1438             for (i = 0; i < b_frames; i++) {
1439                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1440                 s->reordered_input_picture[i + 1]->f->pict_type =
1441                     AV_PICTURE_TYPE_B;
1442                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1443                     s->coded_picture_number++;
1444             }
1445         }
1446     }
1447 no_output_pic:
1448     if (s->reordered_input_picture[0]) {
1449         s->reordered_input_picture[0]->reference =
1450            s->reordered_input_picture[0]->f->pict_type !=
1451                AV_PICTURE_TYPE_B ? 3 : 0;
1452
1453         ff_mpeg_unref_picture(s, &s->new_picture);
1454         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1455             return ret;
1456
1457         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1458             // input is a shared pix, so we can't modifiy it -> alloc a new
1459             // one & ensure that the shared one is reuseable
1460
1461             Picture *pic;
1462             int i = ff_find_unused_picture(s, 0);
1463             if (i < 0)
1464                 return i;
1465             pic = &s->picture[i];
1466
1467             pic->reference = s->reordered_input_picture[0]->reference;
1468             if (ff_alloc_picture(s, pic, 0) < 0) {
1469                 return -1;
1470             }
1471
1472             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1473             if (ret < 0)
1474                 return ret;
1475
1476             /* mark us unused / free shared pic */
1477             av_frame_unref(s->reordered_input_picture[0]->f);
1478             s->reordered_input_picture[0]->shared = 0;
1479
1480             s->current_picture_ptr = pic;
1481         } else {
1482             // input is not a shared pix -> reuse buffer for current_pix
1483             s->current_picture_ptr = s->reordered_input_picture[0];
1484             for (i = 0; i < 4; i++) {
1485                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1486             }
1487         }
1488         ff_mpeg_unref_picture(s, &s->current_picture);
1489         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1490                                        s->current_picture_ptr)) < 0)
1491             return ret;
1492
1493         s->picture_number = s->new_picture.f->display_picture_number;
1494     } else {
1495         ff_mpeg_unref_picture(s, &s->new_picture);
1496     }
1497     return 0;
1498 }
1499
1500 static void frame_end(MpegEncContext *s)
1501 {
1502     if (s->unrestricted_mv &&
1503         s->current_picture.reference &&
1504         !s->intra_only) {
1505         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1506         int hshift = desc->log2_chroma_w;
1507         int vshift = desc->log2_chroma_h;
1508         s->dsp.draw_edges(s->current_picture.f->data[0], s->current_picture.f->linesize[0],
1509                           s->h_edge_pos, s->v_edge_pos,
1510                           EDGE_WIDTH, EDGE_WIDTH,
1511                           EDGE_TOP | EDGE_BOTTOM);
1512         s->dsp.draw_edges(s->current_picture.f->data[1], s->current_picture.f->linesize[1],
1513                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1514                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1515                           EDGE_TOP | EDGE_BOTTOM);
1516         s->dsp.draw_edges(s->current_picture.f->data[2], s->current_picture.f->linesize[2],
1517                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1518                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1519                           EDGE_TOP | EDGE_BOTTOM);
1520     }
1521
1522     emms_c();
1523
1524     s->last_pict_type                 = s->pict_type;
1525     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1526     if (s->pict_type!= AV_PICTURE_TYPE_B)
1527         s->last_non_b_pict_type = s->pict_type;
1528
1529     s->avctx->coded_frame = s->current_picture_ptr->f;
1530
1531 }
1532
1533 static void update_noise_reduction(MpegEncContext *s)
1534 {
1535     int intra, i;
1536
1537     for (intra = 0; intra < 2; intra++) {
1538         if (s->dct_count[intra] > (1 << 16)) {
1539             for (i = 0; i < 64; i++) {
1540                 s->dct_error_sum[intra][i] >>= 1;
1541             }
1542             s->dct_count[intra] >>= 1;
1543         }
1544
1545         for (i = 0; i < 64; i++) {
1546             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1547                                        s->dct_count[intra] +
1548                                        s->dct_error_sum[intra][i] / 2) /
1549                                       (s->dct_error_sum[intra][i] + 1);
1550         }
1551     }
1552 }
1553
1554 static int frame_start(MpegEncContext *s)
1555 {
1556     int ret;
1557
1558     /* mark & release old frames */
1559     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1560         s->last_picture_ptr != s->next_picture_ptr &&
1561         s->last_picture_ptr->f->buf[0]) {
1562         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1563     }
1564
1565     s->current_picture_ptr->f->pict_type = s->pict_type;
1566     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1567
1568     ff_mpeg_unref_picture(s, &s->current_picture);
1569     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1570                                    s->current_picture_ptr)) < 0)
1571         return ret;
1572
1573     if (s->pict_type != AV_PICTURE_TYPE_B) {
1574         s->last_picture_ptr = s->next_picture_ptr;
1575         if (!s->droppable)
1576             s->next_picture_ptr = s->current_picture_ptr;
1577     }
1578
1579     if (s->last_picture_ptr) {
1580         ff_mpeg_unref_picture(s, &s->last_picture);
1581         if (s->last_picture_ptr->f->buf[0] &&
1582             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1583                                        s->last_picture_ptr)) < 0)
1584             return ret;
1585     }
1586     if (s->next_picture_ptr) {
1587         ff_mpeg_unref_picture(s, &s->next_picture);
1588         if (s->next_picture_ptr->f->buf[0] &&
1589             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1590                                        s->next_picture_ptr)) < 0)
1591             return ret;
1592     }
1593
1594     if (s->picture_structure!= PICT_FRAME) {
1595         int i;
1596         for (i = 0; i < 4; i++) {
1597             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1598                 s->current_picture.f->data[i] +=
1599                     s->current_picture.f->linesize[i];
1600             }
1601             s->current_picture.f->linesize[i] *= 2;
1602             s->last_picture.f->linesize[i]    *= 2;
1603             s->next_picture.f->linesize[i]    *= 2;
1604         }
1605     }
1606
1607     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1608         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1609         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1610     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1611         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1612         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1613     } else {
1614         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1615         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1616     }
1617
1618     if (s->dct_error_sum) {
1619         av_assert2(s->avctx->noise_reduction && s->encoding);
1620         update_noise_reduction(s);
1621     }
1622
1623     return 0;
1624 }
1625
1626 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1627                           const AVFrame *pic_arg, int *got_packet)
1628 {
1629     MpegEncContext *s = avctx->priv_data;
1630     int i, stuffing_count, ret;
1631     int context_count = s->slice_context_count;
1632
1633     s->picture_in_gop_number++;
1634
1635     if (load_input_picture(s, pic_arg) < 0)
1636         return -1;
1637
1638     if (select_input_picture(s) < 0) {
1639         return -1;
1640     }
1641
1642     /* output? */
1643     if (s->new_picture.f->data[0]) {
1644         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1645             return ret;
1646         if (s->mb_info) {
1647             s->mb_info_ptr = av_packet_new_side_data(pkt,
1648                                  AV_PKT_DATA_H263_MB_INFO,
1649                                  s->mb_width*s->mb_height*12);
1650             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1651         }
1652
1653         for (i = 0; i < context_count; i++) {
1654             int start_y = s->thread_context[i]->start_mb_y;
1655             int   end_y = s->thread_context[i]->  end_mb_y;
1656             int h       = s->mb_height;
1657             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1658             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1659
1660             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1661         }
1662
1663         s->pict_type = s->new_picture.f->pict_type;
1664         //emms_c();
1665         ret = frame_start(s);
1666         if (ret < 0)
1667             return ret;
1668 vbv_retry:
1669         if (encode_picture(s, s->picture_number) < 0)
1670             return -1;
1671
1672         avctx->header_bits = s->header_bits;
1673         avctx->mv_bits     = s->mv_bits;
1674         avctx->misc_bits   = s->misc_bits;
1675         avctx->i_tex_bits  = s->i_tex_bits;
1676         avctx->p_tex_bits  = s->p_tex_bits;
1677         avctx->i_count     = s->i_count;
1678         // FIXME f/b_count in avctx
1679         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1680         avctx->skip_count  = s->skip_count;
1681
1682         frame_end(s);
1683
1684         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1685             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1686
1687         if (avctx->rc_buffer_size) {
1688             RateControlContext *rcc = &s->rc_context;
1689             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1690
1691             if (put_bits_count(&s->pb) > max_size &&
1692                 s->lambda < s->avctx->lmax) {
1693                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1694                                        (s->qscale + 1) / s->qscale);
1695                 if (s->adaptive_quant) {
1696                     int i;
1697                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1698                         s->lambda_table[i] =
1699                             FFMAX(s->lambda_table[i] + 1,
1700                                   s->lambda_table[i] * (s->qscale + 1) /
1701                                   s->qscale);
1702                 }
1703                 s->mb_skipped = 0;        // done in frame_start()
1704                 // done in encode_picture() so we must undo it
1705                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1706                     if (s->flipflop_rounding          ||
1707                         s->codec_id == AV_CODEC_ID_H263P ||
1708                         s->codec_id == AV_CODEC_ID_MPEG4)
1709                         s->no_rounding ^= 1;
1710                 }
1711                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1712                     s->time_base       = s->last_time_base;
1713                     s->last_non_b_time = s->time - s->pp_time;
1714                 }
1715                 for (i = 0; i < context_count; i++) {
1716                     PutBitContext *pb = &s->thread_context[i]->pb;
1717                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1718                 }
1719                 goto vbv_retry;
1720             }
1721
1722             av_assert0(s->avctx->rc_max_rate);
1723         }
1724
1725         if (s->flags & CODEC_FLAG_PASS1)
1726             ff_write_pass1_stats(s);
1727
1728         for (i = 0; i < 4; i++) {
1729             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1730             avctx->error[i] += s->current_picture_ptr->f->error[i];
1731         }
1732
1733         if (s->flags & CODEC_FLAG_PASS1)
1734             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1735                    avctx->i_tex_bits + avctx->p_tex_bits ==
1736                        put_bits_count(&s->pb));
1737         flush_put_bits(&s->pb);
1738         s->frame_bits  = put_bits_count(&s->pb);
1739
1740         stuffing_count = ff_vbv_update(s, s->frame_bits);
1741         s->stuffing_bits = 8*stuffing_count;
1742         if (stuffing_count) {
1743             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1744                     stuffing_count + 50) {
1745                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1746                 return -1;
1747             }
1748
1749             switch (s->codec_id) {
1750             case AV_CODEC_ID_MPEG1VIDEO:
1751             case AV_CODEC_ID_MPEG2VIDEO:
1752                 while (stuffing_count--) {
1753                     put_bits(&s->pb, 8, 0);
1754                 }
1755             break;
1756             case AV_CODEC_ID_MPEG4:
1757                 put_bits(&s->pb, 16, 0);
1758                 put_bits(&s->pb, 16, 0x1C3);
1759                 stuffing_count -= 4;
1760                 while (stuffing_count--) {
1761                     put_bits(&s->pb, 8, 0xFF);
1762                 }
1763             break;
1764             default:
1765                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1766             }
1767             flush_put_bits(&s->pb);
1768             s->frame_bits  = put_bits_count(&s->pb);
1769         }
1770
1771         /* update mpeg1/2 vbv_delay for CBR */
1772         if (s->avctx->rc_max_rate                          &&
1773             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1774             s->out_format == FMT_MPEG1                     &&
1775             90000LL * (avctx->rc_buffer_size - 1) <=
1776                 s->avctx->rc_max_rate * 0xFFFFLL) {
1777             int vbv_delay, min_delay;
1778             double inbits  = s->avctx->rc_max_rate *
1779                              av_q2d(s->avctx->time_base);
1780             int    minbits = s->frame_bits - 8 *
1781                              (s->vbv_delay_ptr - s->pb.buf - 1);
1782             double bits    = s->rc_context.buffer_index + minbits - inbits;
1783
1784             if (bits < 0)
1785                 av_log(s->avctx, AV_LOG_ERROR,
1786                        "Internal error, negative bits\n");
1787
1788             assert(s->repeat_first_field == 0);
1789
1790             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1791             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1792                         s->avctx->rc_max_rate;
1793
1794             vbv_delay = FFMAX(vbv_delay, min_delay);
1795
1796             av_assert0(vbv_delay < 0xFFFF);
1797
1798             s->vbv_delay_ptr[0] &= 0xF8;
1799             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1800             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1801             s->vbv_delay_ptr[2] &= 0x07;
1802             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1803             avctx->vbv_delay     = vbv_delay * 300;
1804         }
1805         s->total_bits     += s->frame_bits;
1806         avctx->frame_bits  = s->frame_bits;
1807
1808         pkt->pts = s->current_picture.f->pts;
1809         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1810             if (!s->current_picture.f->coded_picture_number)
1811                 pkt->dts = pkt->pts - s->dts_delta;
1812             else
1813                 pkt->dts = s->reordered_pts;
1814             s->reordered_pts = pkt->pts;
1815         } else
1816             pkt->dts = pkt->pts;
1817         if (s->current_picture.f->key_frame)
1818             pkt->flags |= AV_PKT_FLAG_KEY;
1819         if (s->mb_info)
1820             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1821     } else {
1822         s->frame_bits = 0;
1823     }
1824
1825     /* release non-reference frames */
1826     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1827         if (!s->picture[i].reference)
1828             ff_mpeg_unref_picture(s, &s->picture[i]);
1829     }
1830
1831     av_assert1((s->frame_bits & 7) == 0);
1832
1833     pkt->size = s->frame_bits / 8;
1834     *got_packet = !!pkt->size;
1835     return 0;
1836 }
1837
1838 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1839                                                 int n, int threshold)
1840 {
1841     static const char tab[64] = {
1842         3, 2, 2, 1, 1, 1, 1, 1,
1843         1, 1, 1, 1, 1, 1, 1, 1,
1844         1, 1, 1, 1, 1, 1, 1, 1,
1845         0, 0, 0, 0, 0, 0, 0, 0,
1846         0, 0, 0, 0, 0, 0, 0, 0,
1847         0, 0, 0, 0, 0, 0, 0, 0,
1848         0, 0, 0, 0, 0, 0, 0, 0,
1849         0, 0, 0, 0, 0, 0, 0, 0
1850     };
1851     int score = 0;
1852     int run = 0;
1853     int i;
1854     int16_t *block = s->block[n];
1855     const int last_index = s->block_last_index[n];
1856     int skip_dc;
1857
1858     if (threshold < 0) {
1859         skip_dc = 0;
1860         threshold = -threshold;
1861     } else
1862         skip_dc = 1;
1863
1864     /* Are all we could set to zero already zero? */
1865     if (last_index <= skip_dc - 1)
1866         return;
1867
1868     for (i = 0; i <= last_index; i++) {
1869         const int j = s->intra_scantable.permutated[i];
1870         const int level = FFABS(block[j]);
1871         if (level == 1) {
1872             if (skip_dc && i == 0)
1873                 continue;
1874             score += tab[run];
1875             run = 0;
1876         } else if (level > 1) {
1877             return;
1878         } else {
1879             run++;
1880         }
1881     }
1882     if (score >= threshold)
1883         return;
1884     for (i = skip_dc; i <= last_index; i++) {
1885         const int j = s->intra_scantable.permutated[i];
1886         block[j] = 0;
1887     }
1888     if (block[0])
1889         s->block_last_index[n] = 0;
1890     else
1891         s->block_last_index[n] = -1;
1892 }
1893
1894 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1895                                int last_index)
1896 {
1897     int i;
1898     const int maxlevel = s->max_qcoeff;
1899     const int minlevel = s->min_qcoeff;
1900     int overflow = 0;
1901
1902     if (s->mb_intra) {
1903         i = 1; // skip clipping of intra dc
1904     } else
1905         i = 0;
1906
1907     for (; i <= last_index; i++) {
1908         const int j = s->intra_scantable.permutated[i];
1909         int level = block[j];
1910
1911         if (level > maxlevel) {
1912             level = maxlevel;
1913             overflow++;
1914         } else if (level < minlevel) {
1915             level = minlevel;
1916             overflow++;
1917         }
1918
1919         block[j] = level;
1920     }
1921
1922     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1923         av_log(s->avctx, AV_LOG_INFO,
1924                "warning, clipping %d dct coefficients to %d..%d\n",
1925                overflow, minlevel, maxlevel);
1926 }
1927
1928 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1929 {
1930     int x, y;
1931     // FIXME optimize
1932     for (y = 0; y < 8; y++) {
1933         for (x = 0; x < 8; x++) {
1934             int x2, y2;
1935             int sum = 0;
1936             int sqr = 0;
1937             int count = 0;
1938
1939             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1940                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1941                     int v = ptr[x2 + y2 * stride];
1942                     sum += v;
1943                     sqr += v * v;
1944                     count++;
1945                 }
1946             }
1947             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1948         }
1949     }
1950 }
1951
1952 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1953                                                 int motion_x, int motion_y,
1954                                                 int mb_block_height,
1955                                                 int mb_block_width,
1956                                                 int mb_block_count)
1957 {
1958     int16_t weight[12][64];
1959     int16_t orig[12][64];
1960     const int mb_x = s->mb_x;
1961     const int mb_y = s->mb_y;
1962     int i;
1963     int skip_dct[12];
1964     int dct_offset = s->linesize * 8; // default for progressive frames
1965     int uv_dct_offset = s->uvlinesize * 8;
1966     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1967     ptrdiff_t wrap_y, wrap_c;
1968
1969     for (i = 0; i < mb_block_count; i++)
1970         skip_dct[i] = s->skipdct;
1971
1972     if (s->adaptive_quant) {
1973         const int last_qp = s->qscale;
1974         const int mb_xy = mb_x + mb_y * s->mb_stride;
1975
1976         s->lambda = s->lambda_table[mb_xy];
1977         update_qscale(s);
1978
1979         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1980             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1981             s->dquant = s->qscale - last_qp;
1982
1983             if (s->out_format == FMT_H263) {
1984                 s->dquant = av_clip(s->dquant, -2, 2);
1985
1986                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1987                     if (!s->mb_intra) {
1988                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1989                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1990                                 s->dquant = 0;
1991                         }
1992                         if (s->mv_type == MV_TYPE_8X8)
1993                             s->dquant = 0;
1994                     }
1995                 }
1996             }
1997         }
1998         ff_set_qscale(s, last_qp + s->dquant);
1999     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2000         ff_set_qscale(s, s->qscale + s->dquant);
2001
2002     wrap_y = s->linesize;
2003     wrap_c = s->uvlinesize;
2004     ptr_y  = s->new_picture.f->data[0] +
2005              (mb_y * 16 * wrap_y)              + mb_x * 16;
2006     ptr_cb = s->new_picture.f->data[1] +
2007              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2008     ptr_cr = s->new_picture.f->data[2] +
2009              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2010
2011     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2012         uint8_t *ebuf = s->edge_emu_buffer + 32;
2013         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2014         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2015         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2016                                  wrap_y, wrap_y,
2017                                  16, 16, mb_x * 16, mb_y * 16,
2018                                  s->width, s->height);
2019         ptr_y = ebuf;
2020         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
2021                                  wrap_c, wrap_c,
2022                                  mb_block_width, mb_block_height,
2023                                  mb_x * mb_block_width, mb_y * mb_block_height,
2024                                  cw, ch);
2025         ptr_cb = ebuf + 18 * wrap_y;
2026         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 16, ptr_cr,
2027                                  wrap_c, wrap_c,
2028                                  mb_block_width, mb_block_height,
2029                                  mb_x * mb_block_width, mb_y * mb_block_height,
2030                                  cw, ch);
2031         ptr_cr = ebuf + 18 * wrap_y + 16;
2032     }
2033
2034     if (s->mb_intra) {
2035         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2036             int progressive_score, interlaced_score;
2037
2038             s->interlaced_dct = 0;
2039             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
2040                                                     NULL, wrap_y, 8) +
2041                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2042                                                     NULL, wrap_y, 8) - 400;
2043
2044             if (progressive_score > 0) {
2045                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
2046                                                        NULL, wrap_y * 2, 8) +
2047                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
2048                                                        NULL, wrap_y * 2, 8);
2049                 if (progressive_score > interlaced_score) {
2050                     s->interlaced_dct = 1;
2051
2052                     dct_offset = wrap_y;
2053                     uv_dct_offset = wrap_c;
2054                     wrap_y <<= 1;
2055                     if (s->chroma_format == CHROMA_422 ||
2056                         s->chroma_format == CHROMA_444)
2057                         wrap_c <<= 1;
2058                 }
2059             }
2060         }
2061
2062         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
2063         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
2064         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
2065         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
2066
2067         if (s->flags & CODEC_FLAG_GRAY) {
2068             skip_dct[4] = 1;
2069             skip_dct[5] = 1;
2070         } else {
2071             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2072             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2073             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2074                 s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2075                 s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2076             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2077                 s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c);
2078                 s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c);
2079                 s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c);
2080                 s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c);
2081                 s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2082                 s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2083             }
2084         }
2085     } else {
2086         op_pixels_func (*op_pix)[4];
2087         qpel_mc_func (*op_qpix)[16];
2088         uint8_t *dest_y, *dest_cb, *dest_cr;
2089
2090         dest_y  = s->dest[0];
2091         dest_cb = s->dest[1];
2092         dest_cr = s->dest[2];
2093
2094         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2095             op_pix  = s->hdsp.put_pixels_tab;
2096             op_qpix = s->qdsp.put_qpel_pixels_tab;
2097         } else {
2098             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2099             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2100         }
2101
2102         if (s->mv_dir & MV_DIR_FORWARD) {
2103             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
2104                           s->last_picture.f->data,
2105                           op_pix, op_qpix);
2106             op_pix  = s->hdsp.avg_pixels_tab;
2107             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2108         }
2109         if (s->mv_dir & MV_DIR_BACKWARD) {
2110             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
2111                           s->next_picture.f->data,
2112                           op_pix, op_qpix);
2113         }
2114
2115         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2116             int progressive_score, interlaced_score;
2117
2118             s->interlaced_dct = 0;
2119             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
2120                                                     ptr_y,              wrap_y,
2121                                                     8) +
2122                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
2123                                                     ptr_y + wrap_y * 8, wrap_y,
2124                                                     8) - 400;
2125
2126             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2127                 progressive_score -= 400;
2128
2129             if (progressive_score > 0) {
2130                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
2131                                                        ptr_y,
2132                                                        wrap_y * 2, 8) +
2133                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
2134                                                        ptr_y + wrap_y,
2135                                                        wrap_y * 2, 8);
2136
2137                 if (progressive_score > interlaced_score) {
2138                     s->interlaced_dct = 1;
2139
2140                     dct_offset = wrap_y;
2141                     uv_dct_offset = wrap_c;
2142                     wrap_y <<= 1;
2143                     if (s->chroma_format == CHROMA_422)
2144                         wrap_c <<= 1;
2145                 }
2146             }
2147         }
2148
2149         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2150         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2151         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2152                            dest_y + dct_offset, wrap_y);
2153         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2154                            dest_y + dct_offset + 8, wrap_y);
2155
2156         if (s->flags & CODEC_FLAG_GRAY) {
2157             skip_dct[4] = 1;
2158             skip_dct[5] = 1;
2159         } else {
2160             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2161             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2162             if (!s->chroma_y_shift) { /* 422 */
2163                 s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2164                                    dest_cb + uv_dct_offset, wrap_c);
2165                 s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2166                                    dest_cr + uv_dct_offset, wrap_c);
2167             }
2168         }
2169         /* pre quantization */
2170         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2171                 2 * s->qscale * s->qscale) {
2172             // FIXME optimize
2173             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2174                               wrap_y, 8) < 20 * s->qscale)
2175                 skip_dct[0] = 1;
2176             if (s->dsp.sad[1](NULL, ptr_y + 8,
2177                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2178                 skip_dct[1] = 1;
2179             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2180                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2181                 skip_dct[2] = 1;
2182             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2183                               dest_y + dct_offset + 8,
2184                               wrap_y, 8) < 20 * s->qscale)
2185                 skip_dct[3] = 1;
2186             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2187                               wrap_c, 8) < 20 * s->qscale)
2188                 skip_dct[4] = 1;
2189             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2190                               wrap_c, 8) < 20 * s->qscale)
2191                 skip_dct[5] = 1;
2192             if (!s->chroma_y_shift) { /* 422 */
2193                 if (s->dsp.sad[1](NULL, ptr_cb + uv_dct_offset,
2194                                   dest_cb + uv_dct_offset,
2195                                   wrap_c, 8) < 20 * s->qscale)
2196                     skip_dct[6] = 1;
2197                 if (s->dsp.sad[1](NULL, ptr_cr + uv_dct_offset,
2198                                   dest_cr + uv_dct_offset,
2199                                   wrap_c, 8) < 20 * s->qscale)
2200                     skip_dct[7] = 1;
2201             }
2202         }
2203     }
2204
2205     if (s->quantizer_noise_shaping) {
2206         if (!skip_dct[0])
2207             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2208         if (!skip_dct[1])
2209             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2210         if (!skip_dct[2])
2211             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2212         if (!skip_dct[3])
2213             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2214         if (!skip_dct[4])
2215             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2216         if (!skip_dct[5])
2217             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2218         if (!s->chroma_y_shift) { /* 422 */
2219             if (!skip_dct[6])
2220                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2221                                   wrap_c);
2222             if (!skip_dct[7])
2223                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2224                                   wrap_c);
2225         }
2226         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2227     }
2228
2229     /* DCT & quantize */
2230     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2231     {
2232         for (i = 0; i < mb_block_count; i++) {
2233             if (!skip_dct[i]) {
2234                 int overflow;
2235                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2236                 // FIXME we could decide to change to quantizer instead of
2237                 // clipping
2238                 // JS: I don't think that would be a good idea it could lower
2239                 //     quality instead of improve it. Just INTRADC clipping
2240                 //     deserves changes in quantizer
2241                 if (overflow)
2242                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2243             } else
2244                 s->block_last_index[i] = -1;
2245         }
2246         if (s->quantizer_noise_shaping) {
2247             for (i = 0; i < mb_block_count; i++) {
2248                 if (!skip_dct[i]) {
2249                     s->block_last_index[i] =
2250                         dct_quantize_refine(s, s->block[i], weight[i],
2251                                             orig[i], i, s->qscale);
2252                 }
2253             }
2254         }
2255
2256         if (s->luma_elim_threshold && !s->mb_intra)
2257             for (i = 0; i < 4; i++)
2258                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2259         if (s->chroma_elim_threshold && !s->mb_intra)
2260             for (i = 4; i < mb_block_count; i++)
2261                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2262
2263         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2264             for (i = 0; i < mb_block_count; i++) {
2265                 if (s->block_last_index[i] == -1)
2266                     s->coded_score[i] = INT_MAX / 256;
2267             }
2268         }
2269     }
2270
2271     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2272         s->block_last_index[4] =
2273         s->block_last_index[5] = 0;
2274         s->block[4][0] =
2275         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2276         if (!s->chroma_y_shift) { /* 422 / 444 */
2277             for (i=6; i<12; i++) {
2278                 s->block_last_index[i] = 0;
2279                 s->block[i][0] = s->block[4][0];
2280             }
2281         }
2282     }
2283
2284     // non c quantize code returns incorrect block_last_index FIXME
2285     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2286         for (i = 0; i < mb_block_count; i++) {
2287             int j;
2288             if (s->block_last_index[i] > 0) {
2289                 for (j = 63; j > 0; j--) {
2290                     if (s->block[i][s->intra_scantable.permutated[j]])
2291                         break;
2292                 }
2293                 s->block_last_index[i] = j;
2294             }
2295         }
2296     }
2297
2298     /* huffman encode */
2299     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2300     case AV_CODEC_ID_MPEG1VIDEO:
2301     case AV_CODEC_ID_MPEG2VIDEO:
2302         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2303             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2304         break;
2305     case AV_CODEC_ID_MPEG4:
2306         if (CONFIG_MPEG4_ENCODER)
2307             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2308         break;
2309     case AV_CODEC_ID_MSMPEG4V2:
2310     case AV_CODEC_ID_MSMPEG4V3:
2311     case AV_CODEC_ID_WMV1:
2312         if (CONFIG_MSMPEG4_ENCODER)
2313             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2314         break;
2315     case AV_CODEC_ID_WMV2:
2316         if (CONFIG_WMV2_ENCODER)
2317             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2318         break;
2319     case AV_CODEC_ID_H261:
2320         if (CONFIG_H261_ENCODER)
2321             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2322         break;
2323     case AV_CODEC_ID_H263:
2324     case AV_CODEC_ID_H263P:
2325     case AV_CODEC_ID_FLV1:
2326     case AV_CODEC_ID_RV10:
2327     case AV_CODEC_ID_RV20:
2328         if (CONFIG_H263_ENCODER)
2329             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2330         break;
2331     case AV_CODEC_ID_MJPEG:
2332     case AV_CODEC_ID_AMV:
2333         if (CONFIG_MJPEG_ENCODER)
2334             ff_mjpeg_encode_mb(s, s->block);
2335         break;
2336     default:
2337         av_assert1(0);
2338     }
2339 }
2340
2341 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2342 {
2343     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2344     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2345     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2346 }
2347
2348 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2349     int i;
2350
2351     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2352
2353     /* mpeg1 */
2354     d->mb_skip_run= s->mb_skip_run;
2355     for(i=0; i<3; i++)
2356         d->last_dc[i] = s->last_dc[i];
2357
2358     /* statistics */
2359     d->mv_bits= s->mv_bits;
2360     d->i_tex_bits= s->i_tex_bits;
2361     d->p_tex_bits= s->p_tex_bits;
2362     d->i_count= s->i_count;
2363     d->f_count= s->f_count;
2364     d->b_count= s->b_count;
2365     d->skip_count= s->skip_count;
2366     d->misc_bits= s->misc_bits;
2367     d->last_bits= 0;
2368
2369     d->mb_skipped= 0;
2370     d->qscale= s->qscale;
2371     d->dquant= s->dquant;
2372
2373     d->esc3_level_length= s->esc3_level_length;
2374 }
2375
2376 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2377     int i;
2378
2379     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2380     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2381
2382     /* mpeg1 */
2383     d->mb_skip_run= s->mb_skip_run;
2384     for(i=0; i<3; i++)
2385         d->last_dc[i] = s->last_dc[i];
2386
2387     /* statistics */
2388     d->mv_bits= s->mv_bits;
2389     d->i_tex_bits= s->i_tex_bits;
2390     d->p_tex_bits= s->p_tex_bits;
2391     d->i_count= s->i_count;
2392     d->f_count= s->f_count;
2393     d->b_count= s->b_count;
2394     d->skip_count= s->skip_count;
2395     d->misc_bits= s->misc_bits;
2396
2397     d->mb_intra= s->mb_intra;
2398     d->mb_skipped= s->mb_skipped;
2399     d->mv_type= s->mv_type;
2400     d->mv_dir= s->mv_dir;
2401     d->pb= s->pb;
2402     if(s->data_partitioning){
2403         d->pb2= s->pb2;
2404         d->tex_pb= s->tex_pb;
2405     }
2406     d->block= s->block;
2407     for(i=0; i<8; i++)
2408         d->block_last_index[i]= s->block_last_index[i];
2409     d->interlaced_dct= s->interlaced_dct;
2410     d->qscale= s->qscale;
2411
2412     d->esc3_level_length= s->esc3_level_length;
2413 }
2414
2415 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2416                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2417                            int *dmin, int *next_block, int motion_x, int motion_y)
2418 {
2419     int score;
2420     uint8_t *dest_backup[3];
2421
2422     copy_context_before_encode(s, backup, type);
2423
2424     s->block= s->blocks[*next_block];
2425     s->pb= pb[*next_block];
2426     if(s->data_partitioning){
2427         s->pb2   = pb2   [*next_block];
2428         s->tex_pb= tex_pb[*next_block];
2429     }
2430
2431     if(*next_block){
2432         memcpy(dest_backup, s->dest, sizeof(s->dest));
2433         s->dest[0] = s->rd_scratchpad;
2434         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2435         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2436         av_assert0(s->linesize >= 32); //FIXME
2437     }
2438
2439     encode_mb(s, motion_x, motion_y);
2440
2441     score= put_bits_count(&s->pb);
2442     if(s->data_partitioning){
2443         score+= put_bits_count(&s->pb2);
2444         score+= put_bits_count(&s->tex_pb);
2445     }
2446
2447     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2448         ff_MPV_decode_mb(s, s->block);
2449
2450         score *= s->lambda2;
2451         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2452     }
2453
2454     if(*next_block){
2455         memcpy(s->dest, dest_backup, sizeof(s->dest));
2456     }
2457
2458     if(score<*dmin){
2459         *dmin= score;
2460         *next_block^=1;
2461
2462         copy_context_after_encode(best, s, type);
2463     }
2464 }
2465
2466 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2467     uint32_t *sq = ff_square_tab + 256;
2468     int acc=0;
2469     int x,y;
2470
2471     if(w==16 && h==16)
2472         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2473     else if(w==8 && h==8)
2474         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2475
2476     for(y=0; y<h; y++){
2477         for(x=0; x<w; x++){
2478             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2479         }
2480     }
2481
2482     av_assert2(acc>=0);
2483
2484     return acc;
2485 }
2486
2487 static int sse_mb(MpegEncContext *s){
2488     int w= 16;
2489     int h= 16;
2490
2491     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2492     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2493
2494     if(w==16 && h==16)
2495       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2496         return  s->dsp.nsse[0](s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2497                +s->dsp.nsse[1](s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2498                +s->dsp.nsse[1](s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2499       }else{
2500         return  s->dsp.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2501                +s->dsp.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2502                +s->dsp.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2503       }
2504     else
2505         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2506                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2507                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2508 }
2509
2510 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2511     MpegEncContext *s= *(void**)arg;
2512
2513
2514     s->me.pre_pass=1;
2515     s->me.dia_size= s->avctx->pre_dia_size;
2516     s->first_slice_line=1;
2517     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2518         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2519             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2520         }
2521         s->first_slice_line=0;
2522     }
2523
2524     s->me.pre_pass=0;
2525
2526     return 0;
2527 }
2528
2529 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2530     MpegEncContext *s= *(void**)arg;
2531
2532     ff_check_alignment();
2533
2534     s->me.dia_size= s->avctx->dia_size;
2535     s->first_slice_line=1;
2536     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2537         s->mb_x=0; //for block init below
2538         ff_init_block_index(s);
2539         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2540             s->block_index[0]+=2;
2541             s->block_index[1]+=2;
2542             s->block_index[2]+=2;
2543             s->block_index[3]+=2;
2544
2545             /* compute motion vector & mb_type and store in context */
2546             if(s->pict_type==AV_PICTURE_TYPE_B)
2547                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2548             else
2549                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2550         }
2551         s->first_slice_line=0;
2552     }
2553     return 0;
2554 }
2555
2556 static int mb_var_thread(AVCodecContext *c, void *arg){
2557     MpegEncContext *s= *(void**)arg;
2558     int mb_x, mb_y;
2559
2560     ff_check_alignment();
2561
2562     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2563         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2564             int xx = mb_x * 16;
2565             int yy = mb_y * 16;
2566             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2567             int varc;
2568             int sum = s->dsp.pix_sum(pix, s->linesize);
2569
2570             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2571
2572             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2573             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2574             s->me.mb_var_sum_temp    += varc;
2575         }
2576     }
2577     return 0;
2578 }
2579
2580 static void write_slice_end(MpegEncContext *s){
2581     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2582         if(s->partitioned_frame){
2583             ff_mpeg4_merge_partitions(s);
2584         }
2585
2586         ff_mpeg4_stuffing(&s->pb);
2587     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2588         ff_mjpeg_encode_stuffing(s);
2589     }
2590
2591     avpriv_align_put_bits(&s->pb);
2592     flush_put_bits(&s->pb);
2593
2594     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2595         s->misc_bits+= get_bits_diff(s);
2596 }
2597
2598 static void write_mb_info(MpegEncContext *s)
2599 {
2600     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2601     int offset = put_bits_count(&s->pb);
2602     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2603     int gobn = s->mb_y / s->gob_index;
2604     int pred_x, pred_y;
2605     if (CONFIG_H263_ENCODER)
2606         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2607     bytestream_put_le32(&ptr, offset);
2608     bytestream_put_byte(&ptr, s->qscale);
2609     bytestream_put_byte(&ptr, gobn);
2610     bytestream_put_le16(&ptr, mba);
2611     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2612     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2613     /* 4MV not implemented */
2614     bytestream_put_byte(&ptr, 0); /* hmv2 */
2615     bytestream_put_byte(&ptr, 0); /* vmv2 */
2616 }
2617
2618 static void update_mb_info(MpegEncContext *s, int startcode)
2619 {
2620     if (!s->mb_info)
2621         return;
2622     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2623         s->mb_info_size += 12;
2624         s->prev_mb_info = s->last_mb_info;
2625     }
2626     if (startcode) {
2627         s->prev_mb_info = put_bits_count(&s->pb)/8;
2628         /* This might have incremented mb_info_size above, and we return without
2629          * actually writing any info into that slot yet. But in that case,
2630          * this will be called again at the start of the after writing the
2631          * start code, actually writing the mb info. */
2632         return;
2633     }
2634
2635     s->last_mb_info = put_bits_count(&s->pb)/8;
2636     if (!s->mb_info_size)
2637         s->mb_info_size += 12;
2638     write_mb_info(s);
2639 }
2640
2641 static int encode_thread(AVCodecContext *c, void *arg){
2642     MpegEncContext *s= *(void**)arg;
2643     int mb_x, mb_y, pdif = 0;
2644     int chr_h= 16>>s->chroma_y_shift;
2645     int i, j;
2646     MpegEncContext best_s, backup_s;
2647     uint8_t bit_buf[2][MAX_MB_BYTES];
2648     uint8_t bit_buf2[2][MAX_MB_BYTES];
2649     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2650     PutBitContext pb[2], pb2[2], tex_pb[2];
2651
2652     ff_check_alignment();
2653
2654     for(i=0; i<2; i++){
2655         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2656         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2657         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2658     }
2659
2660     s->last_bits= put_bits_count(&s->pb);
2661     s->mv_bits=0;
2662     s->misc_bits=0;
2663     s->i_tex_bits=0;
2664     s->p_tex_bits=0;
2665     s->i_count=0;
2666     s->f_count=0;
2667     s->b_count=0;
2668     s->skip_count=0;
2669
2670     for(i=0; i<3; i++){
2671         /* init last dc values */
2672         /* note: quant matrix value (8) is implied here */
2673         s->last_dc[i] = 128 << s->intra_dc_precision;
2674
2675         s->current_picture.f->error[i] = 0;
2676     }
2677     if(s->codec_id==AV_CODEC_ID_AMV){
2678         s->last_dc[0] = 128*8/13;
2679         s->last_dc[1] = 128*8/14;
2680         s->last_dc[2] = 128*8/14;
2681     }
2682     s->mb_skip_run = 0;
2683     memset(s->last_mv, 0, sizeof(s->last_mv));
2684
2685     s->last_mv_dir = 0;
2686
2687     switch(s->codec_id){
2688     case AV_CODEC_ID_H263:
2689     case AV_CODEC_ID_H263P:
2690     case AV_CODEC_ID_FLV1:
2691         if (CONFIG_H263_ENCODER)
2692             s->gob_index = ff_h263_get_gob_height(s);
2693         break;
2694     case AV_CODEC_ID_MPEG4:
2695         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2696             ff_mpeg4_init_partitions(s);
2697         break;
2698     }
2699
2700     s->resync_mb_x=0;
2701     s->resync_mb_y=0;
2702     s->first_slice_line = 1;
2703     s->ptr_lastgob = s->pb.buf;
2704     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2705         s->mb_x=0;
2706         s->mb_y= mb_y;
2707
2708         ff_set_qscale(s, s->qscale);
2709         ff_init_block_index(s);
2710
2711         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2712             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2713             int mb_type= s->mb_type[xy];
2714 //            int d;
2715             int dmin= INT_MAX;
2716             int dir;
2717
2718             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2719                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2720                 return -1;
2721             }
2722             if(s->data_partitioning){
2723                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2724                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2725                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2726                     return -1;
2727                 }
2728             }
2729
2730             s->mb_x = mb_x;
2731             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2732             ff_update_block_index(s);
2733
2734             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2735                 ff_h261_reorder_mb_index(s);
2736                 xy= s->mb_y*s->mb_stride + s->mb_x;
2737                 mb_type= s->mb_type[xy];
2738             }
2739
2740             /* write gob / video packet header  */
2741             if(s->rtp_mode){
2742                 int current_packet_size, is_gob_start;
2743
2744                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2745
2746                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2747
2748                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2749
2750                 switch(s->codec_id){
2751                 case AV_CODEC_ID_H263:
2752                 case AV_CODEC_ID_H263P:
2753                     if(!s->h263_slice_structured)
2754                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2755                     break;
2756                 case AV_CODEC_ID_MPEG2VIDEO:
2757                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2758                 case AV_CODEC_ID_MPEG1VIDEO:
2759                     if(s->mb_skip_run) is_gob_start=0;
2760                     break;
2761                 case AV_CODEC_ID_MJPEG:
2762                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2763                     break;
2764                 }
2765
2766                 if(is_gob_start){
2767                     if(s->start_mb_y != mb_y || mb_x!=0){
2768                         write_slice_end(s);
2769
2770                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2771                             ff_mpeg4_init_partitions(s);
2772                         }
2773                     }
2774
2775                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2776                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2777
2778                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2779                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2780                         int d = 100 / s->error_rate;
2781                         if(r % d == 0){
2782                             current_packet_size=0;
2783                             s->pb.buf_ptr= s->ptr_lastgob;
2784                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2785                         }
2786                     }
2787
2788                     if (s->avctx->rtp_callback){
2789                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2790                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2791                     }
2792                     update_mb_info(s, 1);
2793
2794                     switch(s->codec_id){
2795                     case AV_CODEC_ID_MPEG4:
2796                         if (CONFIG_MPEG4_ENCODER) {
2797                             ff_mpeg4_encode_video_packet_header(s);
2798                             ff_mpeg4_clean_buffers(s);
2799                         }
2800                     break;
2801                     case AV_CODEC_ID_MPEG1VIDEO:
2802                     case AV_CODEC_ID_MPEG2VIDEO:
2803                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2804                             ff_mpeg1_encode_slice_header(s);
2805                             ff_mpeg1_clean_buffers(s);
2806                         }
2807                     break;
2808                     case AV_CODEC_ID_H263:
2809                     case AV_CODEC_ID_H263P:
2810                         if (CONFIG_H263_ENCODER)
2811                             ff_h263_encode_gob_header(s, mb_y);
2812                     break;
2813                     }
2814
2815                     if(s->flags&CODEC_FLAG_PASS1){
2816                         int bits= put_bits_count(&s->pb);
2817                         s->misc_bits+= bits - s->last_bits;
2818                         s->last_bits= bits;
2819                     }
2820
2821                     s->ptr_lastgob += current_packet_size;
2822                     s->first_slice_line=1;
2823                     s->resync_mb_x=mb_x;
2824                     s->resync_mb_y=mb_y;
2825                 }
2826             }
2827
2828             if(  (s->resync_mb_x   == s->mb_x)
2829                && s->resync_mb_y+1 == s->mb_y){
2830                 s->first_slice_line=0;
2831             }
2832
2833             s->mb_skipped=0;
2834             s->dquant=0; //only for QP_RD
2835
2836             update_mb_info(s, 0);
2837
2838             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2839                 int next_block=0;
2840                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2841
2842                 copy_context_before_encode(&backup_s, s, -1);
2843                 backup_s.pb= s->pb;
2844                 best_s.data_partitioning= s->data_partitioning;
2845                 best_s.partitioned_frame= s->partitioned_frame;
2846                 if(s->data_partitioning){
2847                     backup_s.pb2= s->pb2;
2848                     backup_s.tex_pb= s->tex_pb;
2849                 }
2850
2851                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2852                     s->mv_dir = MV_DIR_FORWARD;
2853                     s->mv_type = MV_TYPE_16X16;
2854                     s->mb_intra= 0;
2855                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2856                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2857                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2858                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2859                 }
2860                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2861                     s->mv_dir = MV_DIR_FORWARD;
2862                     s->mv_type = MV_TYPE_FIELD;
2863                     s->mb_intra= 0;
2864                     for(i=0; i<2; i++){
2865                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2866                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2867                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2868                     }
2869                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2870                                  &dmin, &next_block, 0, 0);
2871                 }
2872                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2873                     s->mv_dir = MV_DIR_FORWARD;
2874                     s->mv_type = MV_TYPE_16X16;
2875                     s->mb_intra= 0;
2876                     s->mv[0][0][0] = 0;
2877                     s->mv[0][0][1] = 0;
2878                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2879                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2880                 }
2881                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2882                     s->mv_dir = MV_DIR_FORWARD;
2883                     s->mv_type = MV_TYPE_8X8;
2884                     s->mb_intra= 0;
2885                     for(i=0; i<4; i++){
2886                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2887                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2888                     }
2889                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2890                                  &dmin, &next_block, 0, 0);
2891                 }
2892                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2893                     s->mv_dir = MV_DIR_FORWARD;
2894                     s->mv_type = MV_TYPE_16X16;
2895                     s->mb_intra= 0;
2896                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2897                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2898                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2899                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2900                 }
2901                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2902                     s->mv_dir = MV_DIR_BACKWARD;
2903                     s->mv_type = MV_TYPE_16X16;
2904                     s->mb_intra= 0;
2905                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2906                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2907                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2908                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2909                 }
2910                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2911                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2912                     s->mv_type = MV_TYPE_16X16;
2913                     s->mb_intra= 0;
2914                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2915                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2916                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2917                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2918                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2919                                  &dmin, &next_block, 0, 0);
2920                 }
2921                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2922                     s->mv_dir = MV_DIR_FORWARD;
2923                     s->mv_type = MV_TYPE_FIELD;
2924                     s->mb_intra= 0;
2925                     for(i=0; i<2; i++){
2926                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2927                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2928                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2929                     }
2930                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2931                                  &dmin, &next_block, 0, 0);
2932                 }
2933                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2934                     s->mv_dir = MV_DIR_BACKWARD;
2935                     s->mv_type = MV_TYPE_FIELD;
2936                     s->mb_intra= 0;
2937                     for(i=0; i<2; i++){
2938                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2939                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2940                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2941                     }
2942                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2943                                  &dmin, &next_block, 0, 0);
2944                 }
2945                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2946                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2947                     s->mv_type = MV_TYPE_FIELD;
2948                     s->mb_intra= 0;
2949                     for(dir=0; dir<2; dir++){
2950                         for(i=0; i<2; i++){
2951                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2952                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2953                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2954                         }
2955                     }
2956                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2957                                  &dmin, &next_block, 0, 0);
2958                 }
2959                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2960                     s->mv_dir = 0;
2961                     s->mv_type = MV_TYPE_16X16;
2962                     s->mb_intra= 1;
2963                     s->mv[0][0][0] = 0;
2964                     s->mv[0][0][1] = 0;
2965                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2966                                  &dmin, &next_block, 0, 0);
2967                     if(s->h263_pred || s->h263_aic){
2968                         if(best_s.mb_intra)
2969                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2970                         else
2971                             ff_clean_intra_table_entries(s); //old mode?
2972                     }
2973                 }
2974
2975                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2976                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2977                         const int last_qp= backup_s.qscale;
2978                         int qpi, qp, dc[6];
2979                         int16_t ac[6][16];
2980                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2981                         static const int dquant_tab[4]={-1,1,-2,2};
2982                         int storecoefs = s->mb_intra && s->dc_val[0];
2983
2984                         av_assert2(backup_s.dquant == 0);
2985
2986                         //FIXME intra
2987                         s->mv_dir= best_s.mv_dir;
2988                         s->mv_type = MV_TYPE_16X16;
2989                         s->mb_intra= best_s.mb_intra;
2990                         s->mv[0][0][0] = best_s.mv[0][0][0];
2991                         s->mv[0][0][1] = best_s.mv[0][0][1];
2992                         s->mv[1][0][0] = best_s.mv[1][0][0];
2993                         s->mv[1][0][1] = best_s.mv[1][0][1];
2994
2995                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2996                         for(; qpi<4; qpi++){
2997                             int dquant= dquant_tab[qpi];
2998                             qp= last_qp + dquant;
2999                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3000                                 continue;
3001                             backup_s.dquant= dquant;
3002                             if(storecoefs){
3003                                 for(i=0; i<6; i++){
3004                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3005                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3006                                 }
3007                             }
3008
3009                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3010                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3011                             if(best_s.qscale != qp){
3012                                 if(storecoefs){
3013                                     for(i=0; i<6; i++){
3014                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3015                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3016                                     }
3017                                 }
3018                             }
3019                         }
3020                     }
3021                 }
3022                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3023                     int mx= s->b_direct_mv_table[xy][0];
3024                     int my= s->b_direct_mv_table[xy][1];
3025
3026                     backup_s.dquant = 0;
3027                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3028                     s->mb_intra= 0;
3029                     ff_mpeg4_set_direct_mv(s, mx, my);
3030                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3031                                  &dmin, &next_block, mx, my);
3032                 }
3033                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3034                     backup_s.dquant = 0;
3035                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3036                     s->mb_intra= 0;
3037                     ff_mpeg4_set_direct_mv(s, 0, 0);
3038                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3039                                  &dmin, &next_block, 0, 0);
3040                 }
3041                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3042                     int coded=0;
3043                     for(i=0; i<6; i++)
3044                         coded |= s->block_last_index[i];
3045                     if(coded){
3046                         int mx,my;
3047                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3048                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3049                             mx=my=0; //FIXME find the one we actually used
3050                             ff_mpeg4_set_direct_mv(s, mx, my);
3051                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3052                             mx= s->mv[1][0][0];
3053                             my= s->mv[1][0][1];
3054                         }else{
3055                             mx= s->mv[0][0][0];
3056                             my= s->mv[0][0][1];
3057                         }
3058
3059                         s->mv_dir= best_s.mv_dir;
3060                         s->mv_type = best_s.mv_type;
3061                         s->mb_intra= 0;
3062 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3063                         s->mv[0][0][1] = best_s.mv[0][0][1];
3064                         s->mv[1][0][0] = best_s.mv[1][0][0];
3065                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3066                         backup_s.dquant= 0;
3067                         s->skipdct=1;
3068                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3069                                         &dmin, &next_block, mx, my);
3070                         s->skipdct=0;
3071                     }
3072                 }
3073
3074                 s->current_picture.qscale_table[xy] = best_s.qscale;
3075
3076                 copy_context_after_encode(s, &best_s, -1);
3077
3078                 pb_bits_count= put_bits_count(&s->pb);
3079                 flush_put_bits(&s->pb);
3080                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3081                 s->pb= backup_s.pb;
3082
3083                 if(s->data_partitioning){
3084                     pb2_bits_count= put_bits_count(&s->pb2);
3085                     flush_put_bits(&s->pb2);
3086                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3087                     s->pb2= backup_s.pb2;
3088
3089                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3090                     flush_put_bits(&s->tex_pb);
3091                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3092                     s->tex_pb= backup_s.tex_pb;
3093                 }
3094                 s->last_bits= put_bits_count(&s->pb);
3095
3096                 if (CONFIG_H263_ENCODER &&
3097                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3098                     ff_h263_update_motion_val(s);
3099
3100                 if(next_block==0){ //FIXME 16 vs linesize16
3101                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3102                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3103                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3104                 }
3105
3106                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3107                     ff_MPV_decode_mb(s, s->block);
3108             } else {
3109                 int motion_x = 0, motion_y = 0;
3110                 s->mv_type=MV_TYPE_16X16;
3111                 // only one MB-Type possible
3112
3113                 switch(mb_type){
3114                 case CANDIDATE_MB_TYPE_INTRA:
3115                     s->mv_dir = 0;
3116                     s->mb_intra= 1;
3117                     motion_x= s->mv[0][0][0] = 0;
3118                     motion_y= s->mv[0][0][1] = 0;
3119                     break;
3120                 case CANDIDATE_MB_TYPE_INTER:
3121                     s->mv_dir = MV_DIR_FORWARD;
3122                     s->mb_intra= 0;
3123                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3124                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3125                     break;
3126                 case CANDIDATE_MB_TYPE_INTER_I:
3127                     s->mv_dir = MV_DIR_FORWARD;
3128                     s->mv_type = MV_TYPE_FIELD;
3129                     s->mb_intra= 0;
3130                     for(i=0; i<2; i++){
3131                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3132                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3133                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3134                     }
3135                     break;
3136                 case CANDIDATE_MB_TYPE_INTER4V:
3137                     s->mv_dir = MV_DIR_FORWARD;
3138                     s->mv_type = MV_TYPE_8X8;
3139                     s->mb_intra= 0;
3140                     for(i=0; i<4; i++){
3141                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3142                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3143                     }
3144                     break;
3145                 case CANDIDATE_MB_TYPE_DIRECT:
3146                     if (CONFIG_MPEG4_ENCODER) {
3147                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3148                         s->mb_intra= 0;
3149                         motion_x=s->b_direct_mv_table[xy][0];
3150                         motion_y=s->b_direct_mv_table[xy][1];
3151                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3152                     }
3153                     break;
3154                 case CANDIDATE_MB_TYPE_DIRECT0:
3155                     if (CONFIG_MPEG4_ENCODER) {
3156                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3157                         s->mb_intra= 0;
3158                         ff_mpeg4_set_direct_mv(s, 0, 0);
3159                     }
3160                     break;
3161                 case CANDIDATE_MB_TYPE_BIDIR:
3162                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3163                     s->mb_intra= 0;
3164                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3165                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3166                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3167                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3168                     break;
3169                 case CANDIDATE_MB_TYPE_BACKWARD:
3170                     s->mv_dir = MV_DIR_BACKWARD;
3171                     s->mb_intra= 0;
3172                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3173                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3174                     break;
3175                 case CANDIDATE_MB_TYPE_FORWARD:
3176                     s->mv_dir = MV_DIR_FORWARD;
3177                     s->mb_intra= 0;
3178                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3179                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3180                     break;
3181                 case CANDIDATE_MB_TYPE_FORWARD_I:
3182                     s->mv_dir = MV_DIR_FORWARD;
3183                     s->mv_type = MV_TYPE_FIELD;
3184                     s->mb_intra= 0;
3185                     for(i=0; i<2; i++){
3186                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3187                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3188                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3189                     }
3190                     break;
3191                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3192                     s->mv_dir = MV_DIR_BACKWARD;
3193                     s->mv_type = MV_TYPE_FIELD;
3194                     s->mb_intra= 0;
3195                     for(i=0; i<2; i++){
3196                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3197                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3198                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3199                     }
3200                     break;
3201                 case CANDIDATE_MB_TYPE_BIDIR_I:
3202                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3203                     s->mv_type = MV_TYPE_FIELD;
3204                     s->mb_intra= 0;
3205                     for(dir=0; dir<2; dir++){
3206                         for(i=0; i<2; i++){
3207                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3208                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3209                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3210                         }
3211                     }
3212                     break;
3213                 default:
3214                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3215                 }
3216
3217                 encode_mb(s, motion_x, motion_y);
3218
3219                 // RAL: Update last macroblock type
3220                 s->last_mv_dir = s->mv_dir;
3221
3222                 if (CONFIG_H263_ENCODER &&
3223                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3224                     ff_h263_update_motion_val(s);
3225
3226                 ff_MPV_decode_mb(s, s->block);
3227             }
3228
3229             /* clean the MV table in IPS frames for direct mode in B frames */
3230             if(s->mb_intra /* && I,P,S_TYPE */){
3231                 s->p_mv_table[xy][0]=0;
3232                 s->p_mv_table[xy][1]=0;
3233             }
3234
3235             if(s->flags&CODEC_FLAG_PSNR){
3236                 int w= 16;
3237                 int h= 16;
3238
3239                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3240                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3241
3242                 s->current_picture.f->error[0] += sse(
3243                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3244                     s->dest[0], w, h, s->linesize);
3245                 s->current_picture.f->error[1] += sse(
3246                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3247                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3248                 s->current_picture.f->error[2] += sse(
3249                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3250                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3251             }
3252             if(s->loop_filter){
3253                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3254                     ff_h263_loop_filter(s);
3255             }
3256             av_dlog(s->avctx, "MB %d %d bits\n",
3257                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3258         }
3259     }
3260
3261     //not beautiful here but we must write it before flushing so it has to be here
3262     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3263         ff_msmpeg4_encode_ext_header(s);
3264
3265     write_slice_end(s);
3266
3267     /* Send the last GOB if RTP */
3268     if (s->avctx->rtp_callback) {
3269         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3270         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3271         /* Call the RTP callback to send the last GOB */
3272         emms_c();
3273         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3274     }
3275
3276     return 0;
3277 }
3278
3279 #define MERGE(field) dst->field += src->field; src->field=0
3280 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3281     MERGE(me.scene_change_score);
3282     MERGE(me.mc_mb_var_sum_temp);
3283     MERGE(me.mb_var_sum_temp);
3284 }
3285
3286 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3287     int i;
3288
3289     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3290     MERGE(dct_count[1]);
3291     MERGE(mv_bits);
3292     MERGE(i_tex_bits);
3293     MERGE(p_tex_bits);
3294     MERGE(i_count);
3295     MERGE(f_count);
3296     MERGE(b_count);
3297     MERGE(skip_count);
3298     MERGE(misc_bits);
3299     MERGE(er.error_count);
3300     MERGE(padding_bug_score);
3301     MERGE(current_picture.f->error[0]);
3302     MERGE(current_picture.f->error[1]);
3303     MERGE(current_picture.f->error[2]);
3304
3305     if(dst->avctx->noise_reduction){
3306         for(i=0; i<64; i++){
3307             MERGE(dct_error_sum[0][i]);
3308             MERGE(dct_error_sum[1][i]);
3309         }
3310     }
3311
3312     assert(put_bits_count(&src->pb) % 8 ==0);
3313     assert(put_bits_count(&dst->pb) % 8 ==0);
3314     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3315     flush_put_bits(&dst->pb);
3316 }
3317
3318 static int estimate_qp(MpegEncContext *s, int dry_run){
3319     if (s->next_lambda){
3320         s->current_picture_ptr->f->quality =
3321         s->current_picture.f->quality = s->next_lambda;
3322         if(!dry_run) s->next_lambda= 0;
3323     } else if (!s->fixed_qscale) {
3324         s->current_picture_ptr->f->quality =
3325         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3326         if (s->current_picture.f->quality < 0)
3327             return -1;
3328     }
3329
3330     if(s->adaptive_quant){
3331         switch(s->codec_id){
3332         case AV_CODEC_ID_MPEG4:
3333             if (CONFIG_MPEG4_ENCODER)
3334                 ff_clean_mpeg4_qscales(s);
3335             break;
3336         case AV_CODEC_ID_H263:
3337         case AV_CODEC_ID_H263P:
3338         case AV_CODEC_ID_FLV1:
3339             if (CONFIG_H263_ENCODER)
3340                 ff_clean_h263_qscales(s);
3341             break;
3342         default:
3343             ff_init_qscale_tab(s);
3344         }
3345
3346         s->lambda= s->lambda_table[0];
3347         //FIXME broken
3348     }else
3349         s->lambda = s->current_picture.f->quality;
3350     update_qscale(s);
3351     return 0;
3352 }
3353
3354 /* must be called before writing the header */
3355 static void set_frame_distances(MpegEncContext * s){
3356     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3357     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3358
3359     if(s->pict_type==AV_PICTURE_TYPE_B){
3360         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3361         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3362     }else{
3363         s->pp_time= s->time - s->last_non_b_time;
3364         s->last_non_b_time= s->time;
3365         assert(s->picture_number==0 || s->pp_time > 0);
3366     }
3367 }
3368
3369 static int encode_picture(MpegEncContext *s, int picture_number)
3370 {
3371     int i, ret;
3372     int bits;
3373     int context_count = s->slice_context_count;
3374
3375     s->picture_number = picture_number;
3376
3377     /* Reset the average MB variance */
3378     s->me.mb_var_sum_temp    =
3379     s->me.mc_mb_var_sum_temp = 0;
3380
3381     /* we need to initialize some time vars before we can encode b-frames */
3382     // RAL: Condition added for MPEG1VIDEO
3383     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3384         set_frame_distances(s);
3385     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3386         ff_set_mpeg4_time(s);
3387
3388     s->me.scene_change_score=0;
3389
3390 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3391
3392     if(s->pict_type==AV_PICTURE_TYPE_I){
3393         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3394         else                        s->no_rounding=0;
3395     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3396         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3397             s->no_rounding ^= 1;
3398     }
3399
3400     if(s->flags & CODEC_FLAG_PASS2){
3401         if (estimate_qp(s,1) < 0)
3402             return -1;
3403         ff_get_2pass_fcode(s);
3404     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3405         if(s->pict_type==AV_PICTURE_TYPE_B)
3406             s->lambda= s->last_lambda_for[s->pict_type];
3407         else
3408             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3409         update_qscale(s);
3410     }
3411
3412     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3413         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3414         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3415         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3416         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3417     }
3418
3419     s->mb_intra=0; //for the rate distortion & bit compare functions
3420     for(i=1; i<context_count; i++){
3421         ret = ff_update_duplicate_context(s->thread_context[i], s);
3422         if (ret < 0)
3423             return ret;
3424     }
3425
3426     if(ff_init_me(s)<0)
3427         return -1;
3428
3429     /* Estimate motion for every MB */
3430     if(s->pict_type != AV_PICTURE_TYPE_I){
3431         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3432         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3433         if (s->pict_type != AV_PICTURE_TYPE_B) {
3434             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3435                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3436             }
3437         }
3438
3439         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3440     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3441         /* I-Frame */
3442         for(i=0; i<s->mb_stride*s->mb_height; i++)
3443             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3444
3445         if(!s->fixed_qscale){
3446             /* finding spatial complexity for I-frame rate control */
3447             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3448         }
3449     }
3450     for(i=1; i<context_count; i++){
3451         merge_context_after_me(s, s->thread_context[i]);
3452     }
3453     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3454     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3455     emms_c();
3456
3457     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3458         s->pict_type= AV_PICTURE_TYPE_I;
3459         for(i=0; i<s->mb_stride*s->mb_height; i++)
3460             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3461         if(s->msmpeg4_version >= 3)
3462             s->no_rounding=1;
3463         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3464                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3465     }
3466
3467     if(!s->umvplus){
3468         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3469             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3470
3471             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3472                 int a,b;
3473                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3474                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3475                 s->f_code= FFMAX3(s->f_code, a, b);
3476             }
3477
3478             ff_fix_long_p_mvs(s);
3479             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3480             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3481                 int j;
3482                 for(i=0; i<2; i++){
3483                     for(j=0; j<2; j++)
3484                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3485                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3486                 }
3487             }
3488         }
3489
3490         if(s->pict_type==AV_PICTURE_TYPE_B){
3491             int a, b;
3492
3493             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3494             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3495             s->f_code = FFMAX(a, b);
3496
3497             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3498             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3499             s->b_code = FFMAX(a, b);
3500
3501             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3502             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3503             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3504             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3505             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3506                 int dir, j;
3507                 for(dir=0; dir<2; dir++){
3508                     for(i=0; i<2; i++){
3509                         for(j=0; j<2; j++){
3510                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3511                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3512                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3513                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3514                         }
3515                     }
3516                 }
3517             }
3518         }
3519     }
3520
3521     if (estimate_qp(s, 0) < 0)
3522         return -1;
3523
3524     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3525         s->qscale= 3; //reduce clipping problems
3526
3527     if (s->out_format == FMT_MJPEG) {
3528         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3529         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3530
3531         if (s->avctx->intra_matrix) {
3532             chroma_matrix =
3533             luma_matrix = s->avctx->intra_matrix;
3534         }
3535         if (s->avctx->chroma_intra_matrix)
3536             chroma_matrix = s->avctx->chroma_intra_matrix;
3537
3538         /* for mjpeg, we do include qscale in the matrix */
3539         for(i=1;i<64;i++){
3540             int j= s->dsp.idct_permutation[i];
3541
3542             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3543             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3544         }
3545         s->y_dc_scale_table=
3546         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3547         s->chroma_intra_matrix[0] =
3548         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3549         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3550                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3551         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3552                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3553         s->qscale= 8;
3554     }
3555     if(s->codec_id == AV_CODEC_ID_AMV){
3556         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3557         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3558         for(i=1;i<64;i++){
3559             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3560
3561             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3562             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3563         }
3564         s->y_dc_scale_table= y;
3565         s->c_dc_scale_table= c;
3566         s->intra_matrix[0] = 13;
3567         s->chroma_intra_matrix[0] = 14;
3568         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3569                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3570         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3571                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3572         s->qscale= 8;
3573     }
3574
3575     //FIXME var duplication
3576     s->current_picture_ptr->f->key_frame =
3577     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3578     s->current_picture_ptr->f->pict_type =
3579     s->current_picture.f->pict_type = s->pict_type;
3580
3581     if (s->current_picture.f->key_frame)
3582         s->picture_in_gop_number=0;
3583
3584     s->mb_x = s->mb_y = 0;
3585     s->last_bits= put_bits_count(&s->pb);
3586     switch(s->out_format) {
3587     case FMT_MJPEG:
3588         if (CONFIG_MJPEG_ENCODER)
3589             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3590                                            s->intra_matrix, s->chroma_intra_matrix);
3591         break;
3592     case FMT_H261:
3593         if (CONFIG_H261_ENCODER)
3594             ff_h261_encode_picture_header(s, picture_number);
3595         break;
3596     case FMT_H263:
3597         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3598             ff_wmv2_encode_picture_header(s, picture_number);
3599         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3600             ff_msmpeg4_encode_picture_header(s, picture_number);
3601         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3602             ff_mpeg4_encode_picture_header(s, picture_number);
3603         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3604             ff_rv10_encode_picture_header(s, picture_number);
3605         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3606             ff_rv20_encode_picture_header(s, picture_number);
3607         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3608             ff_flv_encode_picture_header(s, picture_number);
3609         else if (CONFIG_H263_ENCODER)
3610             ff_h263_encode_picture_header(s, picture_number);
3611         break;
3612     case FMT_MPEG1:
3613         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3614             ff_mpeg1_encode_picture_header(s, picture_number);
3615         break;
3616     default:
3617         av_assert0(0);
3618     }
3619     bits= put_bits_count(&s->pb);
3620     s->header_bits= bits - s->last_bits;
3621
3622     for(i=1; i<context_count; i++){
3623         update_duplicate_context_after_me(s->thread_context[i], s);
3624     }
3625     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3626     for(i=1; i<context_count; i++){
3627         merge_context_after_encode(s, s->thread_context[i]);
3628     }
3629     emms_c();
3630     return 0;
3631 }
3632
3633 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3634     const int intra= s->mb_intra;
3635     int i;
3636
3637     s->dct_count[intra]++;
3638
3639     for(i=0; i<64; i++){
3640         int level= block[i];
3641
3642         if(level){
3643             if(level>0){
3644                 s->dct_error_sum[intra][i] += level;
3645                 level -= s->dct_offset[intra][i];
3646                 if(level<0) level=0;
3647             }else{
3648                 s->dct_error_sum[intra][i] -= level;
3649                 level += s->dct_offset[intra][i];
3650                 if(level>0) level=0;
3651             }
3652             block[i]= level;
3653         }
3654     }
3655 }
3656
3657 static int dct_quantize_trellis_c(MpegEncContext *s,
3658                                   int16_t *block, int n,
3659                                   int qscale, int *overflow){
3660     const int *qmat;
3661     const uint8_t *scantable= s->intra_scantable.scantable;
3662     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3663     int max=0;
3664     unsigned int threshold1, threshold2;
3665     int bias=0;
3666     int run_tab[65];
3667     int level_tab[65];
3668     int score_tab[65];
3669     int survivor[65];
3670     int survivor_count;
3671     int last_run=0;
3672     int last_level=0;
3673     int last_score= 0;
3674     int last_i;
3675     int coeff[2][64];
3676     int coeff_count[64];
3677     int qmul, qadd, start_i, last_non_zero, i, dc;
3678     const int esc_length= s->ac_esc_length;
3679     uint8_t * length;
3680     uint8_t * last_length;
3681     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3682
3683     s->dsp.fdct (block);
3684
3685     if(s->dct_error_sum)
3686         s->denoise_dct(s, block);
3687     qmul= qscale*16;
3688     qadd= ((qscale-1)|1)*8;
3689
3690     if (s->mb_intra) {
3691         int q;
3692         if (!s->h263_aic) {
3693             if (n < 4)
3694                 q = s->y_dc_scale;
3695             else
3696                 q = s->c_dc_scale;
3697             q = q << 3;
3698         } else{
3699             /* For AIC we skip quant/dequant of INTRADC */
3700             q = 1 << 3;
3701             qadd=0;
3702         }
3703
3704         /* note: block[0] is assumed to be positive */
3705         block[0] = (block[0] + (q >> 1)) / q;
3706         start_i = 1;
3707         last_non_zero = 0;
3708         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3709         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3710             bias= 1<<(QMAT_SHIFT-1);
3711         length     = s->intra_ac_vlc_length;
3712         last_length= s->intra_ac_vlc_last_length;
3713     } else {
3714         start_i = 0;
3715         last_non_zero = -1;
3716         qmat = s->q_inter_matrix[qscale];
3717         length     = s->inter_ac_vlc_length;
3718         last_length= s->inter_ac_vlc_last_length;
3719     }
3720     last_i= start_i;
3721
3722     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3723     threshold2= (threshold1<<1);
3724
3725     for(i=63; i>=start_i; i--) {
3726         const int j = scantable[i];
3727         int level = block[j] * qmat[j];
3728
3729         if(((unsigned)(level+threshold1))>threshold2){
3730             last_non_zero = i;
3731             break;
3732         }
3733     }
3734
3735     for(i=start_i; i<=last_non_zero; i++) {
3736         const int j = scantable[i];
3737         int level = block[j] * qmat[j];
3738
3739 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3740 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3741         if(((unsigned)(level+threshold1))>threshold2){
3742             if(level>0){
3743                 level= (bias + level)>>QMAT_SHIFT;
3744                 coeff[0][i]= level;
3745                 coeff[1][i]= level-1;
3746 //                coeff[2][k]= level-2;
3747             }else{
3748                 level= (bias - level)>>QMAT_SHIFT;
3749                 coeff[0][i]= -level;
3750                 coeff[1][i]= -level+1;
3751 //                coeff[2][k]= -level+2;
3752             }
3753             coeff_count[i]= FFMIN(level, 2);
3754             av_assert2(coeff_count[i]);
3755             max |=level;
3756         }else{
3757             coeff[0][i]= (level>>31)|1;
3758             coeff_count[i]= 1;
3759         }
3760     }
3761
3762     *overflow= s->max_qcoeff < max; //overflow might have happened
3763
3764     if(last_non_zero < start_i){
3765         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3766         return last_non_zero;
3767     }
3768
3769     score_tab[start_i]= 0;
3770     survivor[0]= start_i;
3771     survivor_count= 1;
3772
3773     for(i=start_i; i<=last_non_zero; i++){
3774         int level_index, j, zero_distortion;
3775         int dct_coeff= FFABS(block[ scantable[i] ]);
3776         int best_score=256*256*256*120;
3777
3778         if (s->dsp.fdct == ff_fdct_ifast)
3779             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3780         zero_distortion= dct_coeff*dct_coeff;
3781
3782         for(level_index=0; level_index < coeff_count[i]; level_index++){
3783             int distortion;
3784             int level= coeff[level_index][i];
3785             const int alevel= FFABS(level);
3786             int unquant_coeff;
3787
3788             av_assert2(level);
3789
3790             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3791                 unquant_coeff= alevel*qmul + qadd;
3792             }else{ //MPEG1
3793                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3794                 if(s->mb_intra){
3795                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3796                         unquant_coeff =   (unquant_coeff - 1) | 1;
3797                 }else{
3798                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3799                         unquant_coeff =   (unquant_coeff - 1) | 1;
3800                 }
3801                 unquant_coeff<<= 3;
3802             }
3803
3804             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3805             level+=64;
3806             if((level&(~127)) == 0){
3807                 for(j=survivor_count-1; j>=0; j--){
3808                     int run= i - survivor[j];
3809                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3810                     score += score_tab[i-run];
3811
3812                     if(score < best_score){
3813                         best_score= score;
3814                         run_tab[i+1]= run;
3815                         level_tab[i+1]= level-64;
3816                     }
3817                 }
3818
3819                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3820                     for(j=survivor_count-1; j>=0; j--){
3821                         int run= i - survivor[j];
3822                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3823                         score += score_tab[i-run];
3824                         if(score < last_score){
3825                             last_score= score;
3826                             last_run= run;
3827                             last_level= level-64;
3828                             last_i= i+1;
3829                         }
3830                     }
3831                 }
3832             }else{
3833                 distortion += esc_length*lambda;
3834                 for(j=survivor_count-1; j>=0; j--){
3835                     int run= i - survivor[j];
3836                     int score= distortion + score_tab[i-run];
3837
3838                     if(score < best_score){
3839                         best_score= score;
3840                         run_tab[i+1]= run;
3841                         level_tab[i+1]= level-64;
3842                     }
3843                 }
3844
3845                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3846                   for(j=survivor_count-1; j>=0; j--){
3847                         int run= i - survivor[j];
3848                         int score= distortion + score_tab[i-run];
3849                         if(score < last_score){
3850                             last_score= score;
3851                             last_run= run;
3852                             last_level= level-64;
3853                             last_i= i+1;
3854                         }
3855                     }
3856                 }
3857             }
3858         }
3859
3860         score_tab[i+1]= best_score;
3861
3862         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3863         if(last_non_zero <= 27){
3864             for(; survivor_count; survivor_count--){
3865                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3866                     break;
3867             }
3868         }else{
3869             for(; survivor_count; survivor_count--){
3870                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3871                     break;
3872             }
3873         }
3874
3875         survivor[ survivor_count++ ]= i+1;
3876     }
3877
3878     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3879         last_score= 256*256*256*120;
3880         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3881             int score= score_tab[i];
3882             if(i) score += lambda*2; //FIXME exacter?
3883
3884             if(score < last_score){
3885                 last_score= score;
3886                 last_i= i;
3887                 last_level= level_tab[i];
3888                 last_run= run_tab[i];
3889             }
3890         }
3891     }
3892
3893     s->coded_score[n] = last_score;
3894
3895     dc= FFABS(block[0]);
3896     last_non_zero= last_i - 1;
3897     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3898
3899     if(last_non_zero < start_i)
3900         return last_non_zero;
3901
3902     if(last_non_zero == 0 && start_i == 0){
3903         int best_level= 0;
3904         int best_score= dc * dc;
3905
3906         for(i=0; i<coeff_count[0]; i++){
3907             int level= coeff[i][0];
3908             int alevel= FFABS(level);
3909             int unquant_coeff, score, distortion;
3910
3911             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3912                     unquant_coeff= (alevel*qmul + qadd)>>3;
3913             }else{ //MPEG1
3914                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3915                     unquant_coeff =   (unquant_coeff - 1) | 1;
3916             }
3917             unquant_coeff = (unquant_coeff + 4) >> 3;
3918             unquant_coeff<<= 3 + 3;
3919
3920             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3921             level+=64;
3922             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3923             else                    score= distortion + esc_length*lambda;
3924
3925             if(score < best_score){
3926                 best_score= score;
3927                 best_level= level - 64;
3928             }
3929         }
3930         block[0]= best_level;
3931         s->coded_score[n] = best_score - dc*dc;
3932         if(best_level == 0) return -1;
3933         else                return last_non_zero;
3934     }
3935
3936     i= last_i;
3937     av_assert2(last_level);
3938
3939     block[ perm_scantable[last_non_zero] ]= last_level;
3940     i -= last_run + 1;
3941
3942     for(; i>start_i; i -= run_tab[i] + 1){
3943         block[ perm_scantable[i-1] ]= level_tab[i];
3944     }
3945
3946     return last_non_zero;
3947 }
3948
3949 //#define REFINE_STATS 1
3950 static int16_t basis[64][64];
3951
3952 static void build_basis(uint8_t *perm){
3953     int i, j, x, y;
3954     emms_c();
3955     for(i=0; i<8; i++){
3956         for(j=0; j<8; j++){
3957             for(y=0; y<8; y++){
3958                 for(x=0; x<8; x++){
3959                     double s= 0.25*(1<<BASIS_SHIFT);
3960                     int index= 8*i + j;
3961                     int perm_index= perm[index];
3962                     if(i==0) s*= sqrt(0.5);
3963                     if(j==0) s*= sqrt(0.5);
3964                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3965                 }
3966             }
3967         }
3968     }
3969 }
3970
3971 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3972                         int16_t *block, int16_t *weight, int16_t *orig,
3973                         int n, int qscale){
3974     int16_t rem[64];
3975     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3976     const uint8_t *scantable= s->intra_scantable.scantable;
3977     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3978 //    unsigned int threshold1, threshold2;
3979 //    int bias=0;
3980     int run_tab[65];
3981     int prev_run=0;
3982     int prev_level=0;
3983     int qmul, qadd, start_i, last_non_zero, i, dc;
3984     uint8_t * length;
3985     uint8_t * last_length;
3986     int lambda;
3987     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3988 #ifdef REFINE_STATS
3989 static int count=0;
3990 static int after_last=0;
3991 static int to_zero=0;
3992 static int from_zero=0;
3993 static int raise=0;
3994 static int lower=0;
3995 static int messed_sign=0;
3996 #endif
3997
3998     if(basis[0][0] == 0)
3999         build_basis(s->dsp.idct_permutation);
4000
4001     qmul= qscale*2;
4002     qadd= (qscale-1)|1;
4003     if (s->mb_intra) {
4004         if (!s->h263_aic) {
4005             if (n < 4)
4006                 q = s->y_dc_scale;
4007             else
4008                 q = s->c_dc_scale;
4009         } else{
4010             /* For AIC we skip quant/dequant of INTRADC */
4011             q = 1;
4012             qadd=0;
4013         }
4014         q <<= RECON_SHIFT-3;
4015         /* note: block[0] is assumed to be positive */
4016         dc= block[0]*q;
4017 //        block[0] = (block[0] + (q >> 1)) / q;
4018         start_i = 1;
4019 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4020 //            bias= 1<<(QMAT_SHIFT-1);
4021         length     = s->intra_ac_vlc_length;
4022         last_length= s->intra_ac_vlc_last_length;
4023     } else {
4024         dc= 0;
4025         start_i = 0;
4026         length     = s->inter_ac_vlc_length;
4027         last_length= s->inter_ac_vlc_last_length;
4028     }
4029     last_non_zero = s->block_last_index[n];
4030
4031 #ifdef REFINE_STATS
4032 {START_TIMER
4033 #endif
4034     dc += (1<<(RECON_SHIFT-1));
4035     for(i=0; i<64; i++){
4036         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4037     }
4038 #ifdef REFINE_STATS
4039 STOP_TIMER("memset rem[]")}
4040 #endif
4041     sum=0;
4042     for(i=0; i<64; i++){
4043         int one= 36;
4044         int qns=4;
4045         int w;
4046
4047         w= FFABS(weight[i]) + qns*one;
4048         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4049
4050         weight[i] = w;
4051 //        w=weight[i] = (63*qns + (w/2)) / w;
4052
4053         av_assert2(w>0);
4054         av_assert2(w<(1<<6));
4055         sum += w*w;
4056     }
4057     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4058 #ifdef REFINE_STATS
4059 {START_TIMER
4060 #endif
4061     run=0;
4062     rle_index=0;
4063     for(i=start_i; i<=last_non_zero; i++){
4064         int j= perm_scantable[i];
4065         const int level= block[j];
4066         int coeff;
4067
4068         if(level){
4069             if(level<0) coeff= qmul*level - qadd;
4070             else        coeff= qmul*level + qadd;
4071             run_tab[rle_index++]=run;
4072             run=0;
4073
4074             s->dsp.add_8x8basis(rem, basis[j], coeff);
4075         }else{
4076             run++;
4077         }
4078     }
4079 #ifdef REFINE_STATS
4080 if(last_non_zero>0){
4081 STOP_TIMER("init rem[]")
4082 }
4083 }
4084
4085 {START_TIMER
4086 #endif
4087     for(;;){
4088         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
4089         int best_coeff=0;
4090         int best_change=0;
4091         int run2, best_unquant_change=0, analyze_gradient;
4092 #ifdef REFINE_STATS
4093 {START_TIMER
4094 #endif
4095         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4096
4097         if(analyze_gradient){
4098 #ifdef REFINE_STATS
4099 {START_TIMER
4100 #endif
4101             for(i=0; i<64; i++){
4102                 int w= weight[i];
4103
4104                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4105             }
4106 #ifdef REFINE_STATS
4107 STOP_TIMER("rem*w*w")}
4108 {START_TIMER
4109 #endif
4110             s->dsp.fdct(d1);
4111 #ifdef REFINE_STATS
4112 STOP_TIMER("dct")}
4113 #endif
4114         }
4115
4116         if(start_i){
4117             const int level= block[0];
4118             int change, old_coeff;
4119
4120             av_assert2(s->mb_intra);
4121
4122             old_coeff= q*level;
4123
4124             for(change=-1; change<=1; change+=2){
4125                 int new_level= level + change;
4126                 int score, new_coeff;
4127
4128                 new_coeff= q*new_level;
4129                 if(new_coeff >= 2048 || new_coeff < 0)
4130                     continue;
4131
4132                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
4133                 if(score<best_score){
4134                     best_score= score;
4135                     best_coeff= 0;
4136                     best_change= change;
4137                     best_unquant_change= new_coeff - old_coeff;
4138                 }
4139             }
4140         }
4141
4142         run=0;
4143         rle_index=0;
4144         run2= run_tab[rle_index++];
4145         prev_level=0;
4146         prev_run=0;
4147
4148         for(i=start_i; i<64; i++){
4149             int j= perm_scantable[i];
4150             const int level= block[j];
4151             int change, old_coeff;
4152
4153             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4154                 break;
4155
4156             if(level){
4157                 if(level<0) old_coeff= qmul*level - qadd;
4158                 else        old_coeff= qmul*level + qadd;
4159                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4160             }else{
4161                 old_coeff=0;
4162                 run2--;
4163                 av_assert2(run2>=0 || i >= last_non_zero );
4164             }
4165
4166             for(change=-1; change<=1; change+=2){
4167                 int new_level= level + change;
4168                 int score, new_coeff, unquant_change;
4169
4170                 score=0;
4171                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4172                    continue;
4173
4174                 if(new_level){
4175                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4176                     else            new_coeff= qmul*new_level + qadd;
4177                     if(new_coeff >= 2048 || new_coeff <= -2048)
4178                         continue;
4179                     //FIXME check for overflow
4180
4181                     if(level){
4182                         if(level < 63 && level > -63){
4183                             if(i < last_non_zero)
4184                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4185                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4186                             else
4187                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4188                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4189                         }
4190                     }else{
4191                         av_assert2(FFABS(new_level)==1);
4192
4193                         if(analyze_gradient){
4194                             int g= d1[ scantable[i] ];
4195                             if(g && (g^new_level) >= 0)
4196                                 continue;
4197                         }
4198
4199                         if(i < last_non_zero){
4200                             int next_i= i + run2 + 1;
4201                             int next_level= block[ perm_scantable[next_i] ] + 64;
4202
4203                             if(next_level&(~127))
4204                                 next_level= 0;
4205
4206                             if(next_i < last_non_zero)
4207                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4208                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4209                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4210                             else
4211                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4212                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4213                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4214                         }else{
4215                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4216                             if(prev_level){
4217                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4218                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4219                             }
4220                         }
4221                     }
4222                 }else{
4223                     new_coeff=0;
4224                     av_assert2(FFABS(level)==1);
4225
4226                     if(i < last_non_zero){
4227                         int next_i= i + run2 + 1;
4228                         int next_level= block[ perm_scantable[next_i] ] + 64;
4229
4230                         if(next_level&(~127))
4231                             next_level= 0;
4232
4233                         if(next_i < last_non_zero)
4234                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4235                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4236                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4237                         else
4238                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4239                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4240                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4241                     }else{
4242                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4243                         if(prev_level){
4244                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4245                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4246                         }
4247                     }
4248                 }
4249
4250                 score *= lambda;
4251
4252                 unquant_change= new_coeff - old_coeff;
4253                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4254
4255                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4256                 if(score<best_score){
4257                     best_score= score;
4258                     best_coeff= i;
4259                     best_change= change;
4260                     best_unquant_change= unquant_change;
4261                 }
4262             }
4263             if(level){
4264                 prev_level= level + 64;
4265                 if(prev_level&(~127))
4266                     prev_level= 0;
4267                 prev_run= run;
4268                 run=0;
4269             }else{
4270                 run++;
4271             }
4272         }
4273 #ifdef REFINE_STATS
4274 STOP_TIMER("iterative step")}
4275 #endif
4276
4277         if(best_change){
4278             int j= perm_scantable[ best_coeff ];
4279
4280             block[j] += best_change;
4281
4282             if(best_coeff > last_non_zero){
4283                 last_non_zero= best_coeff;
4284                 av_assert2(block[j]);
4285 #ifdef REFINE_STATS
4286 after_last++;
4287 #endif
4288             }else{
4289 #ifdef REFINE_STATS
4290 if(block[j]){
4291     if(block[j] - best_change){
4292         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4293             raise++;
4294         }else{
4295             lower++;
4296         }
4297     }else{
4298         from_zero++;
4299     }
4300 }else{
4301     to_zero++;
4302 }
4303 #endif
4304                 for(; last_non_zero>=start_i; last_non_zero--){
4305                     if(block[perm_scantable[last_non_zero]])
4306                         break;
4307                 }
4308             }
4309 #ifdef REFINE_STATS
4310 count++;
4311 if(256*256*256*64 % count == 0){
4312     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4313 }
4314 #endif
4315             run=0;
4316             rle_index=0;
4317             for(i=start_i; i<=last_non_zero; i++){
4318                 int j= perm_scantable[i];
4319                 const int level= block[j];
4320
4321                  if(level){
4322                      run_tab[rle_index++]=run;
4323                      run=0;
4324                  }else{
4325                      run++;
4326                  }
4327             }
4328
4329             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4330         }else{
4331             break;
4332         }
4333     }
4334 #ifdef REFINE_STATS
4335 if(last_non_zero>0){
4336 STOP_TIMER("iterative search")
4337 }
4338 }
4339 #endif
4340
4341     return last_non_zero;
4342 }
4343
4344 int ff_dct_quantize_c(MpegEncContext *s,
4345                         int16_t *block, int n,
4346                         int qscale, int *overflow)
4347 {
4348     int i, j, level, last_non_zero, q, start_i;
4349     const int *qmat;
4350     const uint8_t *scantable= s->intra_scantable.scantable;
4351     int bias;
4352     int max=0;
4353     unsigned int threshold1, threshold2;
4354
4355     s->dsp.fdct (block);
4356
4357     if(s->dct_error_sum)
4358         s->denoise_dct(s, block);
4359
4360     if (s->mb_intra) {
4361         if (!s->h263_aic) {
4362             if (n < 4)
4363                 q = s->y_dc_scale;
4364             else
4365                 q = s->c_dc_scale;
4366             q = q << 3;
4367         } else
4368             /* For AIC we skip quant/dequant of INTRADC */
4369             q = 1 << 3;
4370
4371         /* note: block[0] is assumed to be positive */
4372         block[0] = (block[0] + (q >> 1)) / q;
4373         start_i = 1;
4374         last_non_zero = 0;
4375         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4376         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4377     } else {
4378         start_i = 0;
4379         last_non_zero = -1;
4380         qmat = s->q_inter_matrix[qscale];
4381         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4382     }
4383     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4384     threshold2= (threshold1<<1);
4385     for(i=63;i>=start_i;i--) {
4386         j = scantable[i];
4387         level = block[j] * qmat[j];
4388
4389         if(((unsigned)(level+threshold1))>threshold2){
4390             last_non_zero = i;
4391             break;
4392         }else{
4393             block[j]=0;
4394         }
4395     }
4396     for(i=start_i; i<=last_non_zero; i++) {
4397         j = scantable[i];
4398         level = block[j] * qmat[j];
4399
4400 //        if(   bias+level >= (1<<QMAT_SHIFT)
4401 //           || bias-level >= (1<<QMAT_SHIFT)){
4402         if(((unsigned)(level+threshold1))>threshold2){
4403             if(level>0){
4404                 level= (bias + level)>>QMAT_SHIFT;
4405                 block[j]= level;
4406             }else{
4407                 level= (bias - level)>>QMAT_SHIFT;
4408                 block[j]= -level;
4409             }
4410             max |=level;
4411         }else{
4412             block[j]=0;
4413         }
4414     }
4415     *overflow= s->max_qcoeff < max; //overflow might have happened
4416
4417     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4418     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4419         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4420
4421     return last_non_zero;
4422 }
4423
4424 #define OFFSET(x) offsetof(MpegEncContext, x)
4425 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4426 static const AVOption h263_options[] = {
4427     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4428     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4429     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4430     FF_MPV_COMMON_OPTS
4431     { NULL },
4432 };
4433
4434 static const AVClass h263_class = {
4435     .class_name = "H.263 encoder",
4436     .item_name  = av_default_item_name,
4437     .option     = h263_options,
4438     .version    = LIBAVUTIL_VERSION_INT,
4439 };
4440
4441 AVCodec ff_h263_encoder = {
4442     .name           = "h263",
4443     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4444     .type           = AVMEDIA_TYPE_VIDEO,
4445     .id             = AV_CODEC_ID_H263,
4446     .priv_data_size = sizeof(MpegEncContext),
4447     .init           = ff_MPV_encode_init,
4448     .encode2        = ff_MPV_encode_picture,
4449     .close          = ff_MPV_encode_end,
4450     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4451     .priv_class     = &h263_class,
4452 };
4453
4454 static const AVOption h263p_options[] = {
4455     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4456     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4457     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4458     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4459     FF_MPV_COMMON_OPTS
4460     { NULL },
4461 };
4462 static const AVClass h263p_class = {
4463     .class_name = "H.263p encoder",
4464     .item_name  = av_default_item_name,
4465     .option     = h263p_options,
4466     .version    = LIBAVUTIL_VERSION_INT,
4467 };
4468
4469 AVCodec ff_h263p_encoder = {
4470     .name           = "h263p",
4471     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4472     .type           = AVMEDIA_TYPE_VIDEO,
4473     .id             = AV_CODEC_ID_H263P,
4474     .priv_data_size = sizeof(MpegEncContext),
4475     .init           = ff_MPV_encode_init,
4476     .encode2        = ff_MPV_encode_picture,
4477     .close          = ff_MPV_encode_end,
4478     .capabilities   = CODEC_CAP_SLICE_THREADS,
4479     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4480     .priv_class     = &h263p_class,
4481 };
4482
4483 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4484
4485 AVCodec ff_msmpeg4v2_encoder = {
4486     .name           = "msmpeg4v2",
4487     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4488     .type           = AVMEDIA_TYPE_VIDEO,
4489     .id             = AV_CODEC_ID_MSMPEG4V2,
4490     .priv_data_size = sizeof(MpegEncContext),
4491     .init           = ff_MPV_encode_init,
4492     .encode2        = ff_MPV_encode_picture,
4493     .close          = ff_MPV_encode_end,
4494     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4495     .priv_class     = &msmpeg4v2_class,
4496 };
4497
4498 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4499
4500 AVCodec ff_msmpeg4v3_encoder = {
4501     .name           = "msmpeg4",
4502     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4503     .type           = AVMEDIA_TYPE_VIDEO,
4504     .id             = AV_CODEC_ID_MSMPEG4V3,
4505     .priv_data_size = sizeof(MpegEncContext),
4506     .init           = ff_MPV_encode_init,
4507     .encode2        = ff_MPV_encode_picture,
4508     .close          = ff_MPV_encode_end,
4509     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4510     .priv_class     = &msmpeg4v3_class,
4511 };
4512
4513 FF_MPV_GENERIC_CLASS(wmv1)
4514
4515 AVCodec ff_wmv1_encoder = {
4516     .name           = "wmv1",
4517     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4518     .type           = AVMEDIA_TYPE_VIDEO,
4519     .id             = AV_CODEC_ID_WMV1,
4520     .priv_data_size = sizeof(MpegEncContext),
4521     .init           = ff_MPV_encode_init,
4522     .encode2        = ff_MPV_encode_picture,
4523     .close          = ff_MPV_encode_end,
4524     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4525     .priv_class     = &wmv1_class,
4526 };