git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "dsputil.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "h261.h"
  44 #include "h263.h"
  45 #include "mathops.h"
  46 #include "mjpegenc.h"
  47 #include "msmpeg4.h"
  48 #include "faandct.h"
  49 #include "thread.h"
  50 #include "aandcttab.h"
  51 #include "flv.h"
  52 #include "mpeg4video.h"
  53 #include "internal.h"
  54 #include "bytestream.h"
  55 #include <limits.h>
  56 #include "sp5x.h"
  57
  58 static int encode_picture(MpegEncContext *s, int picture_number);
  59 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  60 static int sse_mb(MpegEncContext *s);
  61 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  62 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  63
  64 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  65 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  66
  67 const AVOption ff_mpv_generic_options[] = {
  68     FF_MPV_COMMON_OPTS
  69     { NULL },
  70 };
  71
  72 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
  73                        uint16_t (*qmat16)[2][64],
  74                        const uint16_t *quant_matrix,
  75                        int bias, int qmin, int qmax, int intra)
  76 {
  77     int qscale;
  78     int shift = 0;
  79
  80     for (qscale = qmin; qscale <= qmax; qscale++) {
  81         int i;
  82         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
  83             dsp->fdct == ff_jpeg_fdct_islow_10 ||
  84             dsp->fdct == ff_faandct) {
  85             for (i = 0; i < 64; i++) {
  86                 const int j = dsp->idct_permutation[i];
  87                 /* 16 <= qscale * quant_matrix[i] <= 7905
  88                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  89                  *             19952 <=              x  <= 249205026
  90                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
  91                  *           3444240 >= (1 << 36) / (x) >= 275 */
  92
  93                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
  94                                         (qscale * quant_matrix[j]));
  95             }
  96         } else if (dsp->fdct == ff_fdct_ifast) {
  97             for (i = 0; i < 64; i++) {
  98                 const int j = dsp->idct_permutation[i];
  99                 /* 16 <= qscale * quant_matrix[i] <= 7905
 100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 101                  *             19952 <=              x  <= 249205026
 102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 103                  *           3444240 >= (1 << 36) / (x) >= 275 */
 104
 105                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 106                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
 107             }
 108         } else {
 109             for (i = 0; i < 64; i++) {
 110                 const int j = dsp->idct_permutation[i];
 111                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 112                  * Assume x = qscale * quant_matrix[i]
 113                  * So             16 <=              x  <= 7905
 114                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 115                  * so          32768 >= (1 << 19) / (x) >= 67 */
 116                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 117                                         (qscale * quant_matrix[j]));
 118                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 119                 //                    (qscale * quant_matrix[i]);
 120                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 121                                        (qscale * quant_matrix[j]);
 122
 123                 if (qmat16[qscale][0][i] == 0 ||
 124                     qmat16[qscale][0][i] == 128 * 256)
 125                     qmat16[qscale][0][i] = 128 * 256 - 1;
 126                 qmat16[qscale][1][i] =
 127                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 128                                 qmat16[qscale][0][i]);
 129             }
 130         }
 131
 132         for (i = intra; i < 64; i++) {
 133             int64_t max = 8191;
 134             if (dsp->fdct == ff_fdct_ifast) {
 135                 max = (8191LL * ff_aanscales[i]) >> 14;
 136             }
 137             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 138                 shift++;
 139             }
 140         }
 141     }
 142     if (shift) {
 143         av_log(NULL, AV_LOG_INFO,
 144                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 145                QMAT_SHIFT - shift);
 146     }
 147 }
 148
 149 static inline void update_qscale(MpegEncContext *s)
 150 {
 151     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 152                 (FF_LAMBDA_SHIFT + 7);
 153     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 154
 155     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 156                  FF_LAMBDA_SHIFT;
 157 }
 158
 159 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 160 {
 161     int i;
 162
 163     if (matrix) {
 164         put_bits(pb, 1, 1);
 165         for (i = 0; i < 64; i++) {
 166             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 167         }
 168     } else
 169         put_bits(pb, 1, 0);
 170 }
 171
 172 /**
 173  * init s->current_picture.qscale_table from s->lambda_table
 174  */
 175 void ff_init_qscale_tab(MpegEncContext *s)
 176 {
 177     int8_t * const qscale_table = s->current_picture.qscale_table;
 178     int i;
 179
 180     for (i = 0; i < s->mb_num; i++) {
 181         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 182         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 183         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 184                                                   s->avctx->qmax);
 185     }
 186 }
 187
 188 static void update_duplicate_context_after_me(MpegEncContext *dst,
 189                                               MpegEncContext *src)
 190 {
 191 #define COPY(a) dst->a= src->a
 192     COPY(pict_type);
 193     COPY(current_picture);
 194     COPY(f_code);
 195     COPY(b_code);
 196     COPY(qscale);
 197     COPY(lambda);
 198     COPY(lambda2);
 199     COPY(picture_in_gop_number);
 200     COPY(gop_picture_number);
 201     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 202     COPY(progressive_frame);    // FIXME don't set in encode_header
 203     COPY(partitioned_frame);    // FIXME don't set in encode_header
 204 #undef COPY
 205 }
 206
 207 /**
 208  * Set the given MpegEncContext to defaults for encoding.
 209  * the changed fields will not depend upon the prior state of the MpegEncContext.
 210  */
 211 static void MPV_encode_defaults(MpegEncContext *s)
 212 {
 213     int i;
 214     ff_MPV_common_defaults(s);
 215
 216     for (i = -16; i < 16; i++) {
 217         default_fcode_tab[i + MAX_MV] = 1;
 218     }
 219     s->me.mv_penalty = default_mv_penalty;
 220     s->fcode_tab     = default_fcode_tab;
 221
 222     s->input_picture_number  = 0;
 223     s->picture_in_gop_number = 0;
 224 }
 225
 226 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 227     if (ARCH_X86)
 228         ff_dct_encode_init_x86(s);
 229
 230     if (CONFIG_H263_ENCODER)
 231         ff_h263dsp_init(&s->h263dsp);
 232     if (!s->dct_quantize)
 233         s->dct_quantize = ff_dct_quantize_c;
 234     if (!s->denoise_dct)
 235         s->denoise_dct  = denoise_dct_c;
 236     s->fast_dct_quantize = s->dct_quantize;
 237     if (s->avctx->trellis)
 238         s->dct_quantize  = dct_quantize_trellis_c;
 239
 240     return 0;
 241 }
 242
 243 /* init video encoder */
 244 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
 245 {
 246     MpegEncContext *s = avctx->priv_data;
 247     int i, ret;
 248
 249     MPV_encode_defaults(s);
 250
 251     switch (avctx->codec_id) {
 252     case AV_CODEC_ID_MPEG2VIDEO:
 253         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 254             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 255             av_log(avctx, AV_LOG_ERROR,
 256                    "only YUV420 and YUV422 are supported\n");
 257             return -1;
 258         }
 259         break;
 260     case AV_CODEC_ID_MJPEG:
 261     case AV_CODEC_ID_AMV:
 262         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
 263             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
 264             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
 265             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 266               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
 267               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
 268              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
 269             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 270             return -1;
 271         }
 272         break;
 273     default:
 274         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 275             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 276             return -1;
 277         }
 278     }
 279
 280     switch (avctx->pix_fmt) {
 281     case AV_PIX_FMT_YUVJ444P:
 282     case AV_PIX_FMT_YUV444P:
 283         s->chroma_format = CHROMA_444;
 284         break;
 285     case AV_PIX_FMT_YUVJ422P:
 286     case AV_PIX_FMT_YUV422P:
 287         s->chroma_format = CHROMA_422;
 288         break;
 289     case AV_PIX_FMT_YUVJ420P:
 290     case AV_PIX_FMT_YUV420P:
 291     default:
 292         s->chroma_format = CHROMA_420;
 293         break;
 294     }
 295
 296     s->bit_rate = avctx->bit_rate;
 297     s->width    = avctx->width;
 298     s->height   = avctx->height;
 299     if (avctx->gop_size > 600 &&
 300         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 301         av_log(avctx, AV_LOG_WARNING,
 302                "keyframe interval too large!, reducing it from %d to %d\n",
 303                avctx->gop_size, 600);
 304         avctx->gop_size = 600;
 305     }
 306     s->gop_size     = avctx->gop_size;
 307     s->avctx        = avctx;
 308     s->flags        = avctx->flags;
 309     s->flags2       = avctx->flags2;
 310     if (avctx->max_b_frames > MAX_B_FRAMES) {
 311         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 312                "is %d.\n", MAX_B_FRAMES);
 313         avctx->max_b_frames = MAX_B_FRAMES;
 314     }
 315     s->max_b_frames = avctx->max_b_frames;
 316     s->codec_id     = avctx->codec->id;
 317     s->strict_std_compliance = avctx->strict_std_compliance;
 318     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 319     s->mpeg_quant         = avctx->mpeg_quant;
 320     s->rtp_mode           = !!avctx->rtp_payload_size;
 321     s->intra_dc_precision = avctx->intra_dc_precision;
 322     s->user_specified_pts = AV_NOPTS_VALUE;
 323
 324     if (s->gop_size <= 1) {
 325         s->intra_only = 1;
 326         s->gop_size   = 12;
 327     } else {
 328         s->intra_only = 0;
 329     }
 330
 331     s->me_method = avctx->me_method;
 332
 333     /* Fixed QSCALE */
 334     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 335
 336     s->adaptive_quant = (s->avctx->lumi_masking ||
 337                          s->avctx->dark_masking ||
 338                          s->avctx->temporal_cplx_masking ||
 339                          s->avctx->spatial_cplx_masking  ||
 340                          s->avctx->p_masking      ||
 341                          s->avctx->border_masking ||
 342                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 343                         !s->fixed_qscale;
 344
 345     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 346
 347     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 348         switch(avctx->codec_id) {
 349         case AV_CODEC_ID_MPEG1VIDEO:
 350         case AV_CODEC_ID_MPEG2VIDEO:
 351             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
 352             break;
 353         case AV_CODEC_ID_MPEG4:
 354         case AV_CODEC_ID_MSMPEG4V1:
 355         case AV_CODEC_ID_MSMPEG4V2:
 356         case AV_CODEC_ID_MSMPEG4V3:
 357             if       (avctx->rc_max_rate >= 15000000) {
 358                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
 359             } else if(avctx->rc_max_rate >=  2000000) {
 360                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
 361             } else if(avctx->rc_max_rate >=   384000) {
 362                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
 363             } else
 364                 avctx->rc_buffer_size = 40;
 365             avctx->rc_buffer_size *= 16384;
 366             break;
 367         }
 368         if (avctx->rc_buffer_size) {
 369             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 370         }
 371     }
 372
 373     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 374         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 375         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
 376             return -1;
 377     }
 378
 379     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 380         av_log(avctx, AV_LOG_INFO,
 381                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 382     }
 383
 384     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 385         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 386         return -1;
 387     }
 388
 389     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 390         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 391         return -1;
 392     }
 393
 394     if (avctx->rc_max_rate &&
 395         avctx->rc_max_rate == avctx->bit_rate &&
 396         avctx->rc_max_rate != avctx->rc_min_rate) {
 397         av_log(avctx, AV_LOG_INFO,
 398                "impossible bitrate constraints, this will fail\n");
 399     }
 400
 401     if (avctx->rc_buffer_size &&
 402         avctx->bit_rate * (int64_t)avctx->time_base.num >
 403             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 404         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 405         return -1;
 406     }
 407
 408     if (!s->fixed_qscale &&
 409         avctx->bit_rate * av_q2d(avctx->time_base) >
 410             avctx->bit_rate_tolerance) {
 411         av_log(avctx, AV_LOG_ERROR,
 412                "bitrate tolerance %d too small for bitrate %d\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 413         return -1;
 414     }
 415
 416     if (s->avctx->rc_max_rate &&
 417         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 418         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 419          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 420         90000LL * (avctx->rc_buffer_size - 1) >
 421             s->avctx->rc_max_rate * 0xFFFFLL) {
 422         av_log(avctx, AV_LOG_INFO,
 423                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 424                "specified vbv buffer is too large for the given bitrate!\n");
 425     }
 426
 427     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 428         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 429         s->codec_id != AV_CODEC_ID_FLV1) {
 430         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 431         return -1;
 432     }
 433
 434     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 435         av_log(avctx, AV_LOG_ERROR,
 436                "OBMC is only supported with simple mb decision\n");
 437         return -1;
 438     }
 439
 440     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 441         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 442         return -1;
 443     }
 444
 445     if (s->max_b_frames                    &&
 446         s->codec_id != AV_CODEC_ID_MPEG4      &&
 447         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 448         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 449         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 450         return -1;
 451     }
 452     if (s->max_b_frames < 0) {
 453         av_log(avctx, AV_LOG_ERROR,
 454                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 455         return -1;
 456     }
 457
 458     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 459          s->codec_id == AV_CODEC_ID_H263  ||
 460          s->codec_id == AV_CODEC_ID_H263P) &&
 461         (avctx->sample_aspect_ratio.num > 255 ||
 462          avctx->sample_aspect_ratio.den > 255)) {
 463         av_log(avctx, AV_LOG_WARNING,
 464                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 465                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 466         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 467                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 468     }
 469
 470     if ((s->codec_id == AV_CODEC_ID_H263  ||
 471          s->codec_id == AV_CODEC_ID_H263P) &&
 472         (avctx->width  > 2048 ||
 473          avctx->height > 1152 )) {
 474         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 475         return -1;
 476     }
 477     if ((s->codec_id == AV_CODEC_ID_H263  ||
 478          s->codec_id == AV_CODEC_ID_H263P) &&
 479         ((avctx->width &3) ||
 480          (avctx->height&3) )) {
 481         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 482         return -1;
 483     }
 484
 485     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 486         (avctx->width  > 4095 ||
 487          avctx->height > 4095 )) {
 488         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 489         return -1;
 490     }
 491
 492     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 493         (avctx->width  > 16383 ||
 494          avctx->height > 16383 )) {
 495         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 496         return -1;
 497     }
 498
 499     if (s->codec_id == AV_CODEC_ID_RV10 &&
 500         (avctx->width &15 ||
 501          avctx->height&15 )) {
 502         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 503         return AVERROR(EINVAL);
 504     }
 505
 506     if (s->codec_id == AV_CODEC_ID_RV20 &&
 507         (avctx->width &3 ||
 508          avctx->height&3 )) {
 509         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 510         return AVERROR(EINVAL);
 511     }
 512
 513     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 514          s->codec_id == AV_CODEC_ID_WMV2) &&
 515          avctx->width & 1) {
 516          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 517          return -1;
 518     }
 519
 520     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 521         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 522         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 523         return -1;
 524     }
 525
 526     // FIXME mpeg2 uses that too
 527     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 528                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 529         av_log(avctx, AV_LOG_ERROR,
 530                "mpeg2 style quantization not supported by codec\n");
 531         return -1;
 532     }
 533
 534     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 535         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 536         return -1;
 537     }
 538
 539     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 540         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 541         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 542         return -1;
 543     }
 544
 545     if (s->avctx->scenechange_threshold < 1000000000 &&
 546         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 547         av_log(avctx, AV_LOG_ERROR,
 548                "closed gop with scene change detection are not supported yet, "
 549                "set threshold to 1000000000\n");
 550         return -1;
 551     }
 552
 553     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 554         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 555             av_log(avctx, AV_LOG_ERROR,
 556                   "low delay forcing is only available for mpeg2\n");
 557             return -1;
 558         }
 559         if (s->max_b_frames != 0) {
 560             av_log(avctx, AV_LOG_ERROR,
 561                    "b frames cannot be used with low delay\n");
 562             return -1;
 563         }
 564     }
 565
 566     if (s->q_scale_type == 1) {
 567         if (avctx->qmax > 12) {
 568             av_log(avctx, AV_LOG_ERROR,
 569                    "non linear quant only supports qmax <= 12 currently\n");
 570             return -1;
 571         }
 572     }
 573
 574     if (s->avctx->thread_count > 1         &&
 575         s->codec_id != AV_CODEC_ID_MPEG4      &&
 576         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 577         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 578         s->codec_id != AV_CODEC_ID_MJPEG      &&
 579         (s->codec_id != AV_CODEC_ID_H263P)) {
 580         av_log(avctx, AV_LOG_ERROR,
 581                "multi threaded encoding not supported by codec\n");
 582         return -1;
 583     }
 584
 585     if (s->avctx->thread_count < 1) {
 586         av_log(avctx, AV_LOG_ERROR,
 587                "automatic thread number detection not supported by codec, "
 588                "patch welcome\n");
 589         return -1;
 590     }
 591
 592     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 593         s->rtp_mode = 1;
 594
 595     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 596         s->h263_slice_structured = 1;
 597
 598     if (!avctx->time_base.den || !avctx->time_base.num) {
 599         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 600         return -1;
 601     }
 602
 603     i = (INT_MAX / 2 + 128) >> 8;
 604     if (avctx->mb_threshold >= i) {
 605         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 606                i - 1);
 607         return -1;
 608     }
 609
 610     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 611         av_log(avctx, AV_LOG_INFO,
 612                "notice: b_frame_strategy only affects the first pass\n");
 613         avctx->b_frame_strategy = 0;
 614     }
 615
 616     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 617     if (i > 1) {
 618         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 619         avctx->time_base.den /= i;
 620         avctx->time_base.num /= i;
 621         //return -1;
 622     }
 623
 624     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 625         // (a + x * 3 / 8) / x
 626         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 627         s->inter_quant_bias = 0;
 628     } else {
 629         s->intra_quant_bias = 0;
 630         // (a - x / 4) / x
 631         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 632     }
 633
 634     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 635         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 636         return AVERROR(EINVAL);
 637     }
 638
 639     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 640         s->intra_quant_bias = avctx->intra_quant_bias;
 641     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 642         s->inter_quant_bias = avctx->inter_quant_bias;
 643
 644     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 645
 646     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 647         s->avctx->time_base.den > (1 << 16) - 1) {
 648         av_log(avctx, AV_LOG_ERROR,
 649                "timebase %d/%d not supported by MPEG 4 standard, "
 650                "the maximum admitted value for the timebase denominator "
 651                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 652                (1 << 16) - 1);
 653         return -1;
 654     }
 655     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 656
 657     switch (avctx->codec->id) {
 658     case AV_CODEC_ID_MPEG1VIDEO:
 659         s->out_format = FMT_MPEG1;
 660         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 661         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 662         break;
 663     case AV_CODEC_ID_MPEG2VIDEO:
 664         s->out_format = FMT_MPEG1;
 665         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 666         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 667         s->rtp_mode   = 1;
 668         break;
 669     case AV_CODEC_ID_MJPEG:
 670     case AV_CODEC_ID_AMV:
 671         s->out_format = FMT_MJPEG;
 672         s->intra_only = 1; /* force intra only for jpeg */
 673         if (!CONFIG_MJPEG_ENCODER ||
 674             ff_mjpeg_encode_init(s) < 0)
 675             return -1;
 676         avctx->delay = 0;
 677         s->low_delay = 1;
 678         break;
 679     case AV_CODEC_ID_H261:
 680         if (!CONFIG_H261_ENCODER)
 681             return -1;
 682         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 683             av_log(avctx, AV_LOG_ERROR,
 684                    "The specified picture size of %dx%d is not valid for the "
 685                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 686                     s->width, s->height);
 687             return -1;
 688         }
 689         s->out_format = FMT_H261;
 690         avctx->delay  = 0;
 691         s->low_delay  = 1;
 692         break;
 693     case AV_CODEC_ID_H263:
 694         if (!CONFIG_H263_ENCODER)
 695             return -1;
 696         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 697                              s->width, s->height) == 8) {
 698             av_log(avctx, AV_LOG_ERROR,
 699                    "The specified picture size of %dx%d is not valid for "
 700                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 701                    "352x288, 704x576, and 1408x1152. "
 702                    "Try H.263+.\n", s->width, s->height);
 703             return -1;
 704         }
 705         s->out_format = FMT_H263;
 706         avctx->delay  = 0;
 707         s->low_delay  = 1;
 708         break;
 709     case AV_CODEC_ID_H263P:
 710         s->out_format = FMT_H263;
 711         s->h263_plus  = 1;
 712         /* Fx */
 713         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 714         s->modified_quant  = s->h263_aic;
 715         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 716         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 717
 718         /* /Fx */
 719         /* These are just to be sure */
 720         avctx->delay = 0;
 721         s->low_delay = 1;
 722         break;
 723     case AV_CODEC_ID_FLV1:
 724         s->out_format      = FMT_H263;
 725         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 726         s->unrestricted_mv = 1;
 727         s->rtp_mode  = 0; /* don't allow GOB */
 728         avctx->delay = 0;
 729         s->low_delay = 1;
 730         break;
 731     case AV_CODEC_ID_RV10:
 732         s->out_format = FMT_H263;
 733         avctx->delay  = 0;
 734         s->low_delay  = 1;
 735         break;
 736     case AV_CODEC_ID_RV20:
 737         s->out_format      = FMT_H263;
 738         avctx->delay       = 0;
 739         s->low_delay       = 1;
 740         s->modified_quant  = 1;
 741         s->h263_aic        = 1;
 742         s->h263_plus       = 1;
 743         s->loop_filter     = 1;
 744         s->unrestricted_mv = 0;
 745         break;
 746     case AV_CODEC_ID_MPEG4:
 747         s->out_format      = FMT_H263;
 748         s->h263_pred       = 1;
 749         s->unrestricted_mv = 1;
 750         s->low_delay       = s->max_b_frames ? 0 : 1;
 751         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 752         break;
 753     case AV_CODEC_ID_MSMPEG4V2:
 754         s->out_format      = FMT_H263;
 755         s->h263_pred       = 1;
 756         s->unrestricted_mv = 1;
 757         s->msmpeg4_version = 2;
 758         avctx->delay       = 0;
 759         s->low_delay       = 1;
 760         break;
 761     case AV_CODEC_ID_MSMPEG4V3:
 762         s->out_format        = FMT_H263;
 763         s->h263_pred         = 1;
 764         s->unrestricted_mv   = 1;
 765         s->msmpeg4_version   = 3;
 766         s->flipflop_rounding = 1;
 767         avctx->delay         = 0;
 768         s->low_delay         = 1;
 769         break;
 770     case AV_CODEC_ID_WMV1:
 771         s->out_format        = FMT_H263;
 772         s->h263_pred         = 1;
 773         s->unrestricted_mv   = 1;
 774         s->msmpeg4_version   = 4;
 775         s->flipflop_rounding = 1;
 776         avctx->delay         = 0;
 777         s->low_delay         = 1;
 778         break;
 779     case AV_CODEC_ID_WMV2:
 780         s->out_format        = FMT_H263;
 781         s->h263_pred         = 1;
 782         s->unrestricted_mv   = 1;
 783         s->msmpeg4_version   = 5;
 784         s->flipflop_rounding = 1;
 785         avctx->delay         = 0;
 786         s->low_delay         = 1;
 787         break;
 788     default:
 789         return -1;
 790     }
 791
 792     avctx->has_b_frames = !s->low_delay;
 793
 794     s->encoding = 1;
 795
 796     s->progressive_frame    =
 797     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 798                                                 CODEC_FLAG_INTERLACED_ME) ||
 799                                 s->alternate_scan);
 800
 801     /* init */
 802     if (ff_MPV_common_init(s) < 0)
 803         return -1;
 804
 805     s->avctx->coded_frame = &s->current_picture.f;
 806
 807     if (s->msmpeg4_version) {
 808         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 809                           2 * 2 * (MAX_LEVEL + 1) *
 810                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 811     }
 812     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 813
 814     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 815     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 816     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 817     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 818     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 819     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 820     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 821                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 822     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 823                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 824
 825     if (s->avctx->noise_reduction) {
 826         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 827                           2 * 64 * sizeof(uint16_t), fail);
 828     }
 829
 830     ff_dct_encode_init(s);
 831
 832     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 833         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 834
 835     s->quant_precision = 5;
 836
 837     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
 838     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
 839
 840     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 841         ff_h261_encode_init(s);
 842     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 843         ff_h263_encode_init(s);
 844     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 845         ff_msmpeg4_encode_init(s);
 846     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 847         && s->out_format == FMT_MPEG1)
 848         ff_mpeg1_encode_init(s);
 849
 850     /* init q matrix */
 851     for (i = 0; i < 64; i++) {
 852         int j = s->dsp.idct_permutation[i];
 853         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 854             s->mpeg_quant) {
 855             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 856             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 857         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 858             s->intra_matrix[j] =
 859             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 860         } else {
 861             /* mpeg1/2 */
 862             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 863             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 864         }
 865         if (s->avctx->intra_matrix)
 866             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 867         if (s->avctx->inter_matrix)
 868             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 869     }
 870
 871     /* precompute matrix */
 872     /* for mjpeg, we do include qscale in the matrix */
 873     if (s->out_format != FMT_MJPEG) {
 874         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
 875                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 876                           31, 1);
 877         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
 878                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 879                           31, 0);
 880     }
 881
 882     if (ff_rate_control_init(s) < 0)
 883         return -1;
 884
 885 #if FF_API_ERROR_RATE
 886     FF_DISABLE_DEPRECATION_WARNINGS
 887     if (avctx->error_rate)
 888         s->error_rate = avctx->error_rate;
 889     FF_ENABLE_DEPRECATION_WARNINGS;
 890 #endif
 891
 892     if (avctx->b_frame_strategy == 2) {
 893         for (i = 0; i < s->max_b_frames + 2; i++) {
 894             s->tmp_frames[i] = av_frame_alloc();
 895             if (!s->tmp_frames[i])
 896                 return AVERROR(ENOMEM);
 897
 898             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 899             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 900             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 901
 902             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 903             if (ret < 0)
 904                 return ret;
 905         }
 906     }
 907
 908     return 0;
 909 fail:
 910     ff_MPV_encode_end(avctx);
 911     return AVERROR_UNKNOWN;
 912 }
 913
 914 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
 915 {
 916     MpegEncContext *s = avctx->priv_data;
 917     int i;
 918
 919     ff_rate_control_uninit(s);
 920
 921     ff_MPV_common_end(s);
 922     if (CONFIG_MJPEG_ENCODER &&
 923         s->out_format == FMT_MJPEG)
 924         ff_mjpeg_encode_close(s);
 925
 926     av_freep(&avctx->extradata);
 927
 928     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 929         av_frame_free(&s->tmp_frames[i]);
 930
 931     ff_free_picture_tables(&s->new_picture);
 932     ff_mpeg_unref_picture(s, &s->new_picture);
 933
 934     av_freep(&s->avctx->stats_out);
 935     av_freep(&s->ac_stats);
 936
 937     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
 938     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
 939     s->q_chroma_intra_matrix=   NULL;
 940     s->q_chroma_intra_matrix16= NULL;
 941     av_freep(&s->q_intra_matrix);
 942     av_freep(&s->q_inter_matrix);
 943     av_freep(&s->q_intra_matrix16);
 944     av_freep(&s->q_inter_matrix16);
 945     av_freep(&s->input_picture);
 946     av_freep(&s->reordered_input_picture);
 947     av_freep(&s->dct_offset);
 948
 949     return 0;
 950 }
 951
 952 static int get_sae(uint8_t *src, int ref, int stride)
 953 {
 954     int x,y;
 955     int acc = 0;
 956
 957     for (y = 0; y < 16; y++) {
 958         for (x = 0; x < 16; x++) {
 959             acc += FFABS(src[x + y * stride] - ref);
 960         }
 961     }
 962
 963     return acc;
 964 }
 965
 966 static int get_intra_count(MpegEncContext *s, uint8_t *src,
 967                            uint8_t *ref, int stride)
 968 {
 969     int x, y, w, h;
 970     int acc = 0;
 971
 972     w = s->width  & ~15;
 973     h = s->height & ~15;
 974
 975     for (y = 0; y < h; y += 16) {
 976         for (x = 0; x < w; x += 16) {
 977             int offset = x + y * stride;
 978             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
 979                                      16);
 980             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
 981             int sae  = get_sae(src + offset, mean, stride);
 982
 983             acc += sae + 500 < sad;
 984         }
 985     }
 986     return acc;
 987 }
 988
 989
 990 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
 991 {
 992     Picture *pic = NULL;
 993     int64_t pts;
 994     int i, display_picture_number = 0, ret;
 995     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
 996                                                  (s->low_delay ? 0 : 1);
 997     int direct = 1;
 998
 999     if (pic_arg) {
1000         pts = pic_arg->pts;
1001         display_picture_number = s->input_picture_number++;
1002
1003         if (pts != AV_NOPTS_VALUE) {
1004             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1005                 int64_t last = s->user_specified_pts;
1006
1007                 if (pts <= last) {
1008                     av_log(s->avctx, AV_LOG_ERROR,
1009                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1010                            pts, last);
1011                     return AVERROR(EINVAL);
1012                 }
1013
1014                 if (!s->low_delay && display_picture_number == 1)
1015                     s->dts_delta = pts - last;
1016             }
1017             s->user_specified_pts = pts;
1018         } else {
1019             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1020                 s->user_specified_pts =
1021                 pts = s->user_specified_pts + 1;
1022                 av_log(s->avctx, AV_LOG_INFO,
1023                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1024                        pts);
1025             } else {
1026                 pts = display_picture_number;
1027             }
1028         }
1029     }
1030
1031     if (pic_arg) {
1032         if (!pic_arg->buf[0])
1033             direct = 0;
1034         if (pic_arg->linesize[0] != s->linesize)
1035             direct = 0;
1036         if (pic_arg->linesize[1] != s->uvlinesize)
1037             direct = 0;
1038         if (pic_arg->linesize[2] != s->uvlinesize)
1039             direct = 0;
1040         if ((s->width & 15) || (s->height & 15))
1041             direct = 0;
1042
1043         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
1044                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1045
1046         if (direct) {
1047             i = ff_find_unused_picture(s, 1);
1048             if (i < 0)
1049                 return i;
1050
1051             pic = &s->picture[i];
1052             pic->reference = 3;
1053
1054             if ((ret = av_frame_ref(&pic->f, pic_arg)) < 0)
1055                 return ret;
1056             if (ff_alloc_picture(s, pic, 1) < 0) {
1057                 return -1;
1058             }
1059         } else {
1060             i = ff_find_unused_picture(s, 0);
1061             if (i < 0)
1062                 return i;
1063
1064             pic = &s->picture[i];
1065             pic->reference = 3;
1066
1067             if (ff_alloc_picture(s, pic, 0) < 0) {
1068                 return -1;
1069             }
1070
1071             if (pic->f.data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1072                 pic->f.data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1073                 pic->f.data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1074                 // empty
1075             } else {
1076                 int h_chroma_shift, v_chroma_shift;
1077                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1078                                                  &h_chroma_shift,
1079                                                  &v_chroma_shift);
1080
1081                 for (i = 0; i < 3; i++) {
1082                     int src_stride = pic_arg->linesize[i];
1083                     int dst_stride = i ? s->uvlinesize : s->linesize;
1084                     int h_shift = i ? h_chroma_shift : 0;
1085                     int v_shift = i ? v_chroma_shift : 0;
1086                     int w = s->width  >> h_shift;
1087                     int h = s->height >> v_shift;
1088                     uint8_t *src = pic_arg->data[i];
1089                     uint8_t *dst = pic->f.data[i];
1090
1091                     if (s->codec_id == AV_CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1092                         h = ((s->height + 15)/16*16) >> v_shift;
1093                     }
1094
1095                     if (!s->avctx->rc_buffer_size)
1096                         dst += INPLACE_OFFSET;
1097
1098                     if (src_stride == dst_stride)
1099                         memcpy(dst, src, src_stride * h);
1100                     else {
1101                         int h2 = h;
1102                         uint8_t *dst2 = dst;
1103                         while (h2--) {
1104                             memcpy(dst2, src, w);
1105                             dst2 += dst_stride;
1106                             src += src_stride;
1107                         }
1108                     }
1109                     if ((s->width & 15) || (s->height & 15)) {
1110                         s->dsp.draw_edges(dst, dst_stride,
1111                                           w, h,
1112                                           16>>h_shift,
1113                                           16>>v_shift,
1114                                           EDGE_BOTTOM);
1115                     }
1116                 }
1117             }
1118         }
1119         ret = av_frame_copy_props(&pic->f, pic_arg);
1120         if (ret < 0)
1121             return ret;
1122
1123         pic->f.display_picture_number = display_picture_number;
1124         pic->f.pts = pts; // we set this here to avoid modifiying pic_arg
1125     }
1126
1127     /* shift buffer entries */
1128     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1129         s->input_picture[i - 1] = s->input_picture[i];
1130
1131     s->input_picture[encoding_delay] = (Picture*) pic;
1132
1133     return 0;
1134 }
1135
1136 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1137 {
1138     int x, y, plane;
1139     int score = 0;
1140     int64_t score64 = 0;
1141
1142     for (plane = 0; plane < 3; plane++) {
1143         const int stride = p->f.linesize[plane];
1144         const int bw = plane ? 1 : 2;
1145         for (y = 0; y < s->mb_height * bw; y++) {
1146             for (x = 0; x < s->mb_width * bw; x++) {
1147                 int off = p->shared ? 0 : 16;
1148                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1149                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1150                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1151
1152                 switch (FFABS(s->avctx->frame_skip_exp)) {
1153                 case 0: score    =  FFMAX(score, v);          break;
1154                 case 1: score   += FFABS(v);                  break;
1155                 case 2: score64 += v * (int64_t)v;                       break;
1156                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1157                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1158                 }
1159             }
1160         }
1161     }
1162     emms_c();
1163
1164     if (score)
1165         score64 = score;
1166     if (s->avctx->frame_skip_exp < 0)
1167         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1168                       -1.0/s->avctx->frame_skip_exp);
1169
1170     if (score64 < s->avctx->frame_skip_threshold)
1171         return 1;
1172     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1173         return 1;
1174     return 0;
1175 }
1176
1177 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1178 {
1179     AVPacket pkt = { 0 };
1180     int ret, got_output;
1181
1182     av_init_packet(&pkt);
1183     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1184     if (ret < 0)
1185         return ret;
1186
1187     ret = pkt.size;
1188     av_free_packet(&pkt);
1189     return ret;
1190 }
1191
1192 static int estimate_best_b_count(MpegEncContext *s)
1193 {
1194     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1195     AVCodecContext *c = avcodec_alloc_context3(NULL);
1196     const int scale = s->avctx->brd_scale;
1197     int i, j, out_size, p_lambda, b_lambda, lambda2;
1198     int64_t best_rd  = INT64_MAX;
1199     int best_b_count = -1;
1200
1201     av_assert0(scale >= 0 && scale <= 3);
1202
1203     //emms_c();
1204     //s->next_picture_ptr->quality;
1205     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1206     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1207     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1208     if (!b_lambda) // FIXME we should do this somewhere else
1209         b_lambda = p_lambda;
1210     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1211                FF_LAMBDA_SHIFT;
1212
1213     c->width        = s->width  >> scale;
1214     c->height       = s->height >> scale;
1215     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1216                       CODEC_FLAG_INPUT_PRESERVED;
1217     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1218     c->mb_decision  = s->avctx->mb_decision;
1219     c->me_cmp       = s->avctx->me_cmp;
1220     c->mb_cmp       = s->avctx->mb_cmp;
1221     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1222     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1223     c->time_base    = s->avctx->time_base;
1224     c->max_b_frames = s->max_b_frames;
1225
1226     if (avcodec_open2(c, codec, NULL) < 0)
1227         return -1;
1228
1229     for (i = 0; i < s->max_b_frames + 2; i++) {
1230         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1231                                                 s->next_picture_ptr;
1232
1233         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1234             pre_input = *pre_input_ptr;
1235
1236             if (!pre_input.shared && i) {
1237                 pre_input.f.data[0] += INPLACE_OFFSET;
1238                 pre_input.f.data[1] += INPLACE_OFFSET;
1239                 pre_input.f.data[2] += INPLACE_OFFSET;
1240             }
1241
1242             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1243                                  pre_input.f.data[0], pre_input.f.linesize[0],
1244                                  c->width,      c->height);
1245             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1246                                  pre_input.f.data[1], pre_input.f.linesize[1],
1247                                  c->width >> 1, c->height >> 1);
1248             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1249                                  pre_input.f.data[2], pre_input.f.linesize[2],
1250                                  c->width >> 1, c->height >> 1);
1251         }
1252     }
1253
1254     for (j = 0; j < s->max_b_frames + 1; j++) {
1255         int64_t rd = 0;
1256
1257         if (!s->input_picture[j])
1258             break;
1259
1260         c->error[0] = c->error[1] = c->error[2] = 0;
1261
1262         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1263         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1264
1265         out_size = encode_frame(c, s->tmp_frames[0]);
1266
1267         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1268
1269         for (i = 0; i < s->max_b_frames + 1; i++) {
1270             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1271
1272             s->tmp_frames[i + 1]->pict_type = is_p ?
1273                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1274             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1275
1276             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1277
1278             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1279         }
1280
1281         /* get the delayed frames */
1282         while (out_size) {
1283             out_size = encode_frame(c, NULL);
1284             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1285         }
1286
1287         rd += c->error[0] + c->error[1] + c->error[2];
1288
1289         if (rd < best_rd) {
1290             best_rd = rd;
1291             best_b_count = j;
1292         }
1293     }
1294
1295     avcodec_close(c);
1296     av_freep(&c);
1297
1298     return best_b_count;
1299 }
1300
1301 static int select_input_picture(MpegEncContext *s)
1302 {
1303     int i, ret;
1304
1305     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1306         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1307     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1308
1309     /* set next picture type & ordering */
1310     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1311         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1312             if (s->picture_in_gop_number < s->gop_size &&
1313                 s->next_picture_ptr &&
1314                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1315                 // FIXME check that te gop check above is +-1 correct
1316                 av_frame_unref(&s->input_picture[0]->f);
1317
1318                 ff_vbv_update(s, 0);
1319
1320                 goto no_output_pic;
1321             }
1322         }
1323
1324         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1325             s->next_picture_ptr == NULL || s->intra_only) {
1326             s->reordered_input_picture[0] = s->input_picture[0];
1327             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1328             s->reordered_input_picture[0]->f.coded_picture_number =
1329                 s->coded_picture_number++;
1330         } else {
1331             int b_frames;
1332
1333             if (s->flags & CODEC_FLAG_PASS2) {
1334                 for (i = 0; i < s->max_b_frames + 1; i++) {
1335                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1336
1337                     if (pict_num >= s->rc_context.num_entries)
1338                         break;
1339                     if (!s->input_picture[i]) {
1340                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1341                         break;
1342                     }
1343
1344                     s->input_picture[i]->f.pict_type =
1345                         s->rc_context.entry[pict_num].new_pict_type;
1346                 }
1347             }
1348
1349             if (s->avctx->b_frame_strategy == 0) {
1350                 b_frames = s->max_b_frames;
1351                 while (b_frames && !s->input_picture[b_frames])
1352                     b_frames--;
1353             } else if (s->avctx->b_frame_strategy == 1) {
1354                 for (i = 1; i < s->max_b_frames + 1; i++) {
1355                     if (s->input_picture[i] &&
1356                         s->input_picture[i]->b_frame_score == 0) {
1357                         s->input_picture[i]->b_frame_score =
1358                             get_intra_count(s,
1359                                             s->input_picture[i    ]->f.data[0],
1360                                             s->input_picture[i - 1]->f.data[0],
1361                                             s->linesize) + 1;
1362                     }
1363                 }
1364                 for (i = 0; i < s->max_b_frames + 1; i++) {
1365                     if (s->input_picture[i] == NULL ||
1366                         s->input_picture[i]->b_frame_score - 1 >
1367                             s->mb_num / s->avctx->b_sensitivity)
1368                         break;
1369                 }
1370
1371                 b_frames = FFMAX(0, i - 1);
1372
1373                 /* reset scores */
1374                 for (i = 0; i < b_frames + 1; i++) {
1375                     s->input_picture[i]->b_frame_score = 0;
1376                 }
1377             } else if (s->avctx->b_frame_strategy == 2) {
1378                 b_frames = estimate_best_b_count(s);
1379             } else {
1380                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1381                 b_frames = 0;
1382             }
1383
1384             emms_c();
1385
1386             for (i = b_frames - 1; i >= 0; i--) {
1387                 int type = s->input_picture[i]->f.pict_type;
1388                 if (type && type != AV_PICTURE_TYPE_B)
1389                     b_frames = i;
1390             }
1391             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1392                 b_frames == s->max_b_frames) {
1393                 av_log(s->avctx, AV_LOG_ERROR,
1394                        "warning, too many b frames in a row\n");
1395             }
1396
1397             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1398                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1399                     s->gop_size > s->picture_in_gop_number) {
1400                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1401                 } else {
1402                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1403                         b_frames = 0;
1404                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1405                 }
1406             }
1407
1408             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1409                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1410                 b_frames--;
1411
1412             s->reordered_input_picture[0] = s->input_picture[b_frames];
1413             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1414                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1415             s->reordered_input_picture[0]->f.coded_picture_number =
1416                 s->coded_picture_number++;
1417             for (i = 0; i < b_frames; i++) {
1418                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1419                 s->reordered_input_picture[i + 1]->f.pict_type =
1420                     AV_PICTURE_TYPE_B;
1421                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1422                     s->coded_picture_number++;
1423             }
1424         }
1425     }
1426 no_output_pic:
1427     if (s->reordered_input_picture[0]) {
1428         s->reordered_input_picture[0]->reference =
1429            s->reordered_input_picture[0]->f.pict_type !=
1430                AV_PICTURE_TYPE_B ? 3 : 0;
1431
1432         ff_mpeg_unref_picture(s, &s->new_picture);
1433         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1434             return ret;
1435
1436         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1437             // input is a shared pix, so we can't modifiy it -> alloc a new
1438             // one & ensure that the shared one is reuseable
1439
1440             Picture *pic;
1441             int i = ff_find_unused_picture(s, 0);
1442             if (i < 0)
1443                 return i;
1444             pic = &s->picture[i];
1445
1446             pic->reference = s->reordered_input_picture[0]->reference;
1447             if (ff_alloc_picture(s, pic, 0) < 0) {
1448                 return -1;
1449             }
1450
1451             ret = av_frame_copy_props(&pic->f, &s->reordered_input_picture[0]->f);
1452             if (ret < 0)
1453                 return ret;
1454
1455             /* mark us unused / free shared pic */
1456             av_frame_unref(&s->reordered_input_picture[0]->f);
1457             s->reordered_input_picture[0]->shared = 0;
1458
1459             s->current_picture_ptr = pic;
1460         } else {
1461             // input is not a shared pix -> reuse buffer for current_pix
1462             s->current_picture_ptr = s->reordered_input_picture[0];
1463             for (i = 0; i < 4; i++) {
1464                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1465             }
1466         }
1467         ff_mpeg_unref_picture(s, &s->current_picture);
1468         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1469                                        s->current_picture_ptr)) < 0)
1470             return ret;
1471
1472         s->picture_number = s->new_picture.f.display_picture_number;
1473     } else {
1474         ff_mpeg_unref_picture(s, &s->new_picture);
1475     }
1476     return 0;
1477 }
1478
1479 static void frame_end(MpegEncContext *s)
1480 {
1481     if (s->unrestricted_mv &&
1482         s->current_picture.reference &&
1483         !s->intra_only) {
1484         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1485         int hshift = desc->log2_chroma_w;
1486         int vshift = desc->log2_chroma_h;
1487         s->dsp.draw_edges(s->current_picture.f.data[0], s->current_picture.f.linesize[0],
1488                           s->h_edge_pos, s->v_edge_pos,
1489                           EDGE_WIDTH, EDGE_WIDTH,
1490                           EDGE_TOP | EDGE_BOTTOM);
1491         s->dsp.draw_edges(s->current_picture.f.data[1], s->current_picture.f.linesize[1],
1492                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1493                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1494                           EDGE_TOP | EDGE_BOTTOM);
1495         s->dsp.draw_edges(s->current_picture.f.data[2], s->current_picture.f.linesize[2],
1496                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1497                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1498                           EDGE_TOP | EDGE_BOTTOM);
1499     }
1500
1501     emms_c();
1502
1503     s->last_pict_type                 = s->pict_type;
1504     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f.quality;
1505     if (s->pict_type!= AV_PICTURE_TYPE_B)
1506         s->last_non_b_pict_type = s->pict_type;
1507
1508     s->avctx->coded_frame = &s->current_picture_ptr->f;
1509
1510 }
1511
1512 static void update_noise_reduction(MpegEncContext *s)
1513 {
1514     int intra, i;
1515
1516     for (intra = 0; intra < 2; intra++) {
1517         if (s->dct_count[intra] > (1 << 16)) {
1518             for (i = 0; i < 64; i++) {
1519                 s->dct_error_sum[intra][i] >>= 1;
1520             }
1521             s->dct_count[intra] >>= 1;
1522         }
1523
1524         for (i = 0; i < 64; i++) {
1525             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1526                                        s->dct_count[intra] +
1527                                        s->dct_error_sum[intra][i] / 2) /
1528                                       (s->dct_error_sum[intra][i] + 1);
1529         }
1530     }
1531 }
1532
1533 static int frame_start(MpegEncContext *s)
1534 {
1535     int ret;
1536
1537     /* mark & release old frames */
1538     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1539         s->last_picture_ptr != s->next_picture_ptr &&
1540         s->last_picture_ptr->f.buf[0]) {
1541         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1542     }
1543
1544     s->current_picture_ptr->f.pict_type = s->pict_type;
1545     s->current_picture_ptr->f.key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1546
1547     ff_mpeg_unref_picture(s, &s->current_picture);
1548     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1549                                    s->current_picture_ptr)) < 0)
1550         return ret;
1551
1552     if (s->pict_type != AV_PICTURE_TYPE_B) {
1553         s->last_picture_ptr = s->next_picture_ptr;
1554         if (!s->droppable)
1555             s->next_picture_ptr = s->current_picture_ptr;
1556     }
1557
1558     if (s->last_picture_ptr) {
1559         ff_mpeg_unref_picture(s, &s->last_picture);
1560         if (s->last_picture_ptr->f.buf[0] &&
1561             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1562                                        s->last_picture_ptr)) < 0)
1563             return ret;
1564     }
1565     if (s->next_picture_ptr) {
1566         ff_mpeg_unref_picture(s, &s->next_picture);
1567         if (s->next_picture_ptr->f.buf[0] &&
1568             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1569                                        s->next_picture_ptr)) < 0)
1570             return ret;
1571     }
1572
1573     if (s->picture_structure!= PICT_FRAME) {
1574         int i;
1575         for (i = 0; i < 4; i++) {
1576             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1577                 s->current_picture.f.data[i] +=
1578                     s->current_picture.f.linesize[i];
1579             }
1580             s->current_picture.f.linesize[i] *= 2;
1581             s->last_picture.f.linesize[i]    *= 2;
1582             s->next_picture.f.linesize[i]    *= 2;
1583         }
1584     }
1585
1586     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1587         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1588         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1589     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1590         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1591         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1592     } else {
1593         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1594         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1595     }
1596
1597     if (s->dct_error_sum) {
1598         av_assert2(s->avctx->noise_reduction && s->encoding);
1599         update_noise_reduction(s);
1600     }
1601
1602     return 0;
1603 }
1604
1605 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1606                           const AVFrame *pic_arg, int *got_packet)
1607 {
1608     MpegEncContext *s = avctx->priv_data;
1609     int i, stuffing_count, ret;
1610     int context_count = s->slice_context_count;
1611
1612     s->picture_in_gop_number++;
1613
1614     if (load_input_picture(s, pic_arg) < 0)
1615         return -1;
1616
1617     if (select_input_picture(s) < 0) {
1618         return -1;
1619     }
1620
1621     /* output? */
1622     if (s->new_picture.f.data[0]) {
1623         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1624             return ret;
1625         if (s->mb_info) {
1626             s->mb_info_ptr = av_packet_new_side_data(pkt,
1627                                  AV_PKT_DATA_H263_MB_INFO,
1628                                  s->mb_width*s->mb_height*12);
1629             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1630         }
1631
1632         for (i = 0; i < context_count; i++) {
1633             int start_y = s->thread_context[i]->start_mb_y;
1634             int   end_y = s->thread_context[i]->  end_mb_y;
1635             int h       = s->mb_height;
1636             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1637             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1638
1639             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1640         }
1641
1642         s->pict_type = s->new_picture.f.pict_type;
1643         //emms_c();
1644         ret = frame_start(s);
1645         if (ret < 0)
1646             return ret;
1647 vbv_retry:
1648         if (encode_picture(s, s->picture_number) < 0)
1649             return -1;
1650
1651         avctx->header_bits = s->header_bits;
1652         avctx->mv_bits     = s->mv_bits;
1653         avctx->misc_bits   = s->misc_bits;
1654         avctx->i_tex_bits  = s->i_tex_bits;
1655         avctx->p_tex_bits  = s->p_tex_bits;
1656         avctx->i_count     = s->i_count;
1657         // FIXME f/b_count in avctx
1658         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1659         avctx->skip_count  = s->skip_count;
1660
1661         frame_end(s);
1662
1663         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1664             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1665
1666         if (avctx->rc_buffer_size) {
1667             RateControlContext *rcc = &s->rc_context;
1668             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1669
1670             if (put_bits_count(&s->pb) > max_size &&
1671                 s->lambda < s->avctx->lmax) {
1672                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1673                                        (s->qscale + 1) / s->qscale);
1674                 if (s->adaptive_quant) {
1675                     int i;
1676                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1677                         s->lambda_table[i] =
1678                             FFMAX(s->lambda_table[i] + 1,
1679                                   s->lambda_table[i] * (s->qscale + 1) /
1680                                   s->qscale);
1681                 }
1682                 s->mb_skipped = 0;        // done in frame_start()
1683                 // done in encode_picture() so we must undo it
1684                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1685                     if (s->flipflop_rounding          ||
1686                         s->codec_id == AV_CODEC_ID_H263P ||
1687                         s->codec_id == AV_CODEC_ID_MPEG4)
1688                         s->no_rounding ^= 1;
1689                 }
1690                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1691                     s->time_base       = s->last_time_base;
1692                     s->last_non_b_time = s->time - s->pp_time;
1693                 }
1694                 for (i = 0; i < context_count; i++) {
1695                     PutBitContext *pb = &s->thread_context[i]->pb;
1696                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1697                 }
1698                 goto vbv_retry;
1699             }
1700
1701             assert(s->avctx->rc_max_rate);
1702         }
1703
1704         if (s->flags & CODEC_FLAG_PASS1)
1705             ff_write_pass1_stats(s);
1706
1707         for (i = 0; i < 4; i++) {
1708             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1709             avctx->error[i] += s->current_picture_ptr->f.error[i];
1710         }
1711
1712         if (s->flags & CODEC_FLAG_PASS1)
1713             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1714                    avctx->i_tex_bits + avctx->p_tex_bits ==
1715                        put_bits_count(&s->pb));
1716         flush_put_bits(&s->pb);
1717         s->frame_bits  = put_bits_count(&s->pb);
1718
1719         stuffing_count = ff_vbv_update(s, s->frame_bits);
1720         s->stuffing_bits = 8*stuffing_count;
1721         if (stuffing_count) {
1722             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1723                     stuffing_count + 50) {
1724                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1725                 return -1;
1726             }
1727
1728             switch (s->codec_id) {
1729             case AV_CODEC_ID_MPEG1VIDEO:
1730             case AV_CODEC_ID_MPEG2VIDEO:
1731                 while (stuffing_count--) {
1732                     put_bits(&s->pb, 8, 0);
1733                 }
1734             break;
1735             case AV_CODEC_ID_MPEG4:
1736                 put_bits(&s->pb, 16, 0);
1737                 put_bits(&s->pb, 16, 0x1C3);
1738                 stuffing_count -= 4;
1739                 while (stuffing_count--) {
1740                     put_bits(&s->pb, 8, 0xFF);
1741                 }
1742             break;
1743             default:
1744                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1745             }
1746             flush_put_bits(&s->pb);
1747             s->frame_bits  = put_bits_count(&s->pb);
1748         }
1749
1750         /* update mpeg1/2 vbv_delay for CBR */
1751         if (s->avctx->rc_max_rate                          &&
1752             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1753             s->out_format == FMT_MPEG1                     &&
1754             90000LL * (avctx->rc_buffer_size - 1) <=
1755                 s->avctx->rc_max_rate * 0xFFFFLL) {
1756             int vbv_delay, min_delay;
1757             double inbits  = s->avctx->rc_max_rate *
1758                              av_q2d(s->avctx->time_base);
1759             int    minbits = s->frame_bits - 8 *
1760                              (s->vbv_delay_ptr - s->pb.buf - 1);
1761             double bits    = s->rc_context.buffer_index + minbits - inbits;
1762
1763             if (bits < 0)
1764                 av_log(s->avctx, AV_LOG_ERROR,
1765                        "Internal error, negative bits\n");
1766
1767             assert(s->repeat_first_field == 0);
1768
1769             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1770             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1771                         s->avctx->rc_max_rate;
1772
1773             vbv_delay = FFMAX(vbv_delay, min_delay);
1774
1775             av_assert0(vbv_delay < 0xFFFF);
1776
1777             s->vbv_delay_ptr[0] &= 0xF8;
1778             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1779             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1780             s->vbv_delay_ptr[2] &= 0x07;
1781             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1782             avctx->vbv_delay     = vbv_delay * 300;
1783         }
1784         s->total_bits     += s->frame_bits;
1785         avctx->frame_bits  = s->frame_bits;
1786
1787         pkt->pts = s->current_picture.f.pts;
1788         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1789             if (!s->current_picture.f.coded_picture_number)
1790                 pkt->dts = pkt->pts - s->dts_delta;
1791             else
1792                 pkt->dts = s->reordered_pts;
1793             s->reordered_pts = pkt->pts;
1794         } else
1795             pkt->dts = pkt->pts;
1796         if (s->current_picture.f.key_frame)
1797             pkt->flags |= AV_PKT_FLAG_KEY;
1798         if (s->mb_info)
1799             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1800     } else {
1801         s->frame_bits = 0;
1802     }
1803
1804     /* release non-reference frames */
1805     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1806         if (!s->picture[i].reference)
1807             ff_mpeg_unref_picture(s, &s->picture[i]);
1808     }
1809
1810     assert((s->frame_bits & 7) == 0);
1811
1812     pkt->size = s->frame_bits / 8;
1813     *got_packet = !!pkt->size;
1814     return 0;
1815 }
1816
1817 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1818                                                 int n, int threshold)
1819 {
1820     static const char tab[64] = {
1821         3, 2, 2, 1, 1, 1, 1, 1,
1822         1, 1, 1, 1, 1, 1, 1, 1,
1823         1, 1, 1, 1, 1, 1, 1, 1,
1824         0, 0, 0, 0, 0, 0, 0, 0,
1825         0, 0, 0, 0, 0, 0, 0, 0,
1826         0, 0, 0, 0, 0, 0, 0, 0,
1827         0, 0, 0, 0, 0, 0, 0, 0,
1828         0, 0, 0, 0, 0, 0, 0, 0
1829     };
1830     int score = 0;
1831     int run = 0;
1832     int i;
1833     int16_t *block = s->block[n];
1834     const int last_index = s->block_last_index[n];
1835     int skip_dc;
1836
1837     if (threshold < 0) {
1838         skip_dc = 0;
1839         threshold = -threshold;
1840     } else
1841         skip_dc = 1;
1842
1843     /* Are all we could set to zero already zero? */
1844     if (last_index <= skip_dc - 1)
1845         return;
1846
1847     for (i = 0; i <= last_index; i++) {
1848         const int j = s->intra_scantable.permutated[i];
1849         const int level = FFABS(block[j]);
1850         if (level == 1) {
1851             if (skip_dc && i == 0)
1852                 continue;
1853             score += tab[run];
1854             run = 0;
1855         } else if (level > 1) {
1856             return;
1857         } else {
1858             run++;
1859         }
1860     }
1861     if (score >= threshold)
1862         return;
1863     for (i = skip_dc; i <= last_index; i++) {
1864         const int j = s->intra_scantable.permutated[i];
1865         block[j] = 0;
1866     }
1867     if (block[0])
1868         s->block_last_index[n] = 0;
1869     else
1870         s->block_last_index[n] = -1;
1871 }
1872
1873 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1874                                int last_index)
1875 {
1876     int i;
1877     const int maxlevel = s->max_qcoeff;
1878     const int minlevel = s->min_qcoeff;
1879     int overflow = 0;
1880
1881     if (s->mb_intra) {
1882         i = 1; // skip clipping of intra dc
1883     } else
1884         i = 0;
1885
1886     for (; i <= last_index; i++) {
1887         const int j = s->intra_scantable.permutated[i];
1888         int level = block[j];
1889
1890         if (level > maxlevel) {
1891             level = maxlevel;
1892             overflow++;
1893         } else if (level < minlevel) {
1894             level = minlevel;
1895             overflow++;
1896         }
1897
1898         block[j] = level;
1899     }
1900
1901     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1902         av_log(s->avctx, AV_LOG_INFO,
1903                "warning, clipping %d dct coefficients to %d..%d\n",
1904                overflow, minlevel, maxlevel);
1905 }
1906
1907 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1908 {
1909     int x, y;
1910     // FIXME optimize
1911     for (y = 0; y < 8; y++) {
1912         for (x = 0; x < 8; x++) {
1913             int x2, y2;
1914             int sum = 0;
1915             int sqr = 0;
1916             int count = 0;
1917
1918             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1919                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1920                     int v = ptr[x2 + y2 * stride];
1921                     sum += v;
1922                     sqr += v * v;
1923                     count++;
1924                 }
1925             }
1926             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1927         }
1928     }
1929 }
1930
1931 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1932                                                 int motion_x, int motion_y,
1933                                                 int mb_block_height,
1934                                                 int mb_block_width,
1935                                                 int mb_block_count)
1936 {
1937     int16_t weight[12][64];
1938     int16_t orig[12][64];
1939     const int mb_x = s->mb_x;
1940     const int mb_y = s->mb_y;
1941     int i;
1942     int skip_dct[12];
1943     int dct_offset = s->linesize * 8; // default for progressive frames
1944     int uv_dct_offset = s->uvlinesize * 8;
1945     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1946     ptrdiff_t wrap_y, wrap_c;
1947
1948     for (i = 0; i < mb_block_count; i++)
1949         skip_dct[i] = s->skipdct;
1950
1951     if (s->adaptive_quant) {
1952         const int last_qp = s->qscale;
1953         const int mb_xy = mb_x + mb_y * s->mb_stride;
1954
1955         s->lambda = s->lambda_table[mb_xy];
1956         update_qscale(s);
1957
1958         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1959             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1960             s->dquant = s->qscale - last_qp;
1961
1962             if (s->out_format == FMT_H263) {
1963                 s->dquant = av_clip(s->dquant, -2, 2);
1964
1965                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1966                     if (!s->mb_intra) {
1967                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1968                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1969                                 s->dquant = 0;
1970                         }
1971                         if (s->mv_type == MV_TYPE_8X8)
1972                             s->dquant = 0;
1973                     }
1974                 }
1975             }
1976         }
1977         ff_set_qscale(s, last_qp + s->dquant);
1978     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1979         ff_set_qscale(s, s->qscale + s->dquant);
1980
1981     wrap_y = s->linesize;
1982     wrap_c = s->uvlinesize;
1983     ptr_y  = s->new_picture.f.data[0] +
1984              (mb_y * 16 * wrap_y)              + mb_x * 16;
1985     ptr_cb = s->new_picture.f.data[1] +
1986              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
1987     ptr_cr = s->new_picture.f.data[2] +
1988              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
1989
1990     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
1991         uint8_t *ebuf = s->edge_emu_buffer + 32;
1992         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
1993         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
1994         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1995                                  wrap_y, wrap_y,
1996                                  16, 16, mb_x * 16, mb_y * 16,
1997                                  s->width, s->height);
1998         ptr_y = ebuf;
1999         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
2000                                  wrap_c, wrap_c,
2001                                  mb_block_width, mb_block_height,
2002                                  mb_x * mb_block_width, mb_y * mb_block_height,
2003                                  cw, ch);
2004         ptr_cb = ebuf + 18 * wrap_y;
2005         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 16, ptr_cr,
2006                                  wrap_c, wrap_c,
2007                                  mb_block_width, mb_block_height,
2008                                  mb_x * mb_block_width, mb_y * mb_block_height,
2009                                  cw, ch);
2010         ptr_cr = ebuf + 18 * wrap_y + 16;
2011     }
2012
2013     if (s->mb_intra) {
2014         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2015             int progressive_score, interlaced_score;
2016
2017             s->interlaced_dct = 0;
2018             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
2019                                                     NULL, wrap_y, 8) +
2020                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2021                                                     NULL, wrap_y, 8) - 400;
2022
2023             if (progressive_score > 0) {
2024                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
2025                                                        NULL, wrap_y * 2, 8) +
2026                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
2027                                                        NULL, wrap_y * 2, 8);
2028                 if (progressive_score > interlaced_score) {
2029                     s->interlaced_dct = 1;
2030
2031                     dct_offset = wrap_y;
2032                     uv_dct_offset = wrap_c;
2033                     wrap_y <<= 1;
2034                     if (s->chroma_format == CHROMA_422 ||
2035                         s->chroma_format == CHROMA_444)
2036                         wrap_c <<= 1;
2037                 }
2038             }
2039         }
2040
2041         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
2042         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
2043         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
2044         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
2045
2046         if (s->flags & CODEC_FLAG_GRAY) {
2047             skip_dct[4] = 1;
2048             skip_dct[5] = 1;
2049         } else {
2050             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2051             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2052             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2053                 s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2054                 s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2055             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2056                 s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c);
2057                 s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c);
2058                 s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c);
2059                 s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c);
2060                 s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2061                 s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2062             }
2063         }
2064     } else {
2065         op_pixels_func (*op_pix)[4];
2066         qpel_mc_func (*op_qpix)[16];
2067         uint8_t *dest_y, *dest_cb, *dest_cr;
2068
2069         dest_y  = s->dest[0];
2070         dest_cb = s->dest[1];
2071         dest_cr = s->dest[2];
2072
2073         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2074             op_pix  = s->hdsp.put_pixels_tab;
2075             op_qpix = s->dsp.put_qpel_pixels_tab;
2076         } else {
2077             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2078             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
2079         }
2080
2081         if (s->mv_dir & MV_DIR_FORWARD) {
2082             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
2083                           s->last_picture.f.data,
2084                           op_pix, op_qpix);
2085             op_pix  = s->hdsp.avg_pixels_tab;
2086             op_qpix = s->dsp.avg_qpel_pixels_tab;
2087         }
2088         if (s->mv_dir & MV_DIR_BACKWARD) {
2089             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
2090                           s->next_picture.f.data,
2091                           op_pix, op_qpix);
2092         }
2093
2094         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2095             int progressive_score, interlaced_score;
2096
2097             s->interlaced_dct = 0;
2098             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
2099                                                     ptr_y,              wrap_y,
2100                                                     8) +
2101                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
2102                                                     ptr_y + wrap_y * 8, wrap_y,
2103                                                     8) - 400;
2104
2105             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2106                 progressive_score -= 400;
2107
2108             if (progressive_score > 0) {
2109                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
2110                                                        ptr_y,
2111                                                        wrap_y * 2, 8) +
2112                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
2113                                                        ptr_y + wrap_y,
2114                                                        wrap_y * 2, 8);
2115
2116                 if (progressive_score > interlaced_score) {
2117                     s->interlaced_dct = 1;
2118
2119                     dct_offset = wrap_y;
2120                     uv_dct_offset = wrap_c;
2121                     wrap_y <<= 1;
2122                     if (s->chroma_format == CHROMA_422)
2123                         wrap_c <<= 1;
2124                 }
2125             }
2126         }
2127
2128         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2129         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2130         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2131                            dest_y + dct_offset, wrap_y);
2132         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2133                            dest_y + dct_offset + 8, wrap_y);
2134
2135         if (s->flags & CODEC_FLAG_GRAY) {
2136             skip_dct[4] = 1;
2137             skip_dct[5] = 1;
2138         } else {
2139             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2140             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2141             if (!s->chroma_y_shift) { /* 422 */
2142                 s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2143                                    dest_cb + uv_dct_offset, wrap_c);
2144                 s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2145                                    dest_cr + uv_dct_offset, wrap_c);
2146             }
2147         }
2148         /* pre quantization */
2149         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2150                 2 * s->qscale * s->qscale) {
2151             // FIXME optimize
2152             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2153                               wrap_y, 8) < 20 * s->qscale)
2154                 skip_dct[0] = 1;
2155             if (s->dsp.sad[1](NULL, ptr_y + 8,
2156                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2157                 skip_dct[1] = 1;
2158             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2159                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2160                 skip_dct[2] = 1;
2161             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2162                               dest_y + dct_offset + 8,
2163                               wrap_y, 8) < 20 * s->qscale)
2164                 skip_dct[3] = 1;
2165             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2166                               wrap_c, 8) < 20 * s->qscale)
2167                 skip_dct[4] = 1;
2168             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2169                               wrap_c, 8) < 20 * s->qscale)
2170                 skip_dct[5] = 1;
2171             if (!s->chroma_y_shift) { /* 422 */
2172                 if (s->dsp.sad[1](NULL, ptr_cb + uv_dct_offset,
2173                                   dest_cb + uv_dct_offset,
2174                                   wrap_c, 8) < 20 * s->qscale)
2175                     skip_dct[6] = 1;
2176                 if (s->dsp.sad[1](NULL, ptr_cr + uv_dct_offset,
2177                                   dest_cr + uv_dct_offset,
2178                                   wrap_c, 8) < 20 * s->qscale)
2179                     skip_dct[7] = 1;
2180             }
2181         }
2182     }
2183
2184     if (s->quantizer_noise_shaping) {
2185         if (!skip_dct[0])
2186             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2187         if (!skip_dct[1])
2188             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2189         if (!skip_dct[2])
2190             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2191         if (!skip_dct[3])
2192             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2193         if (!skip_dct[4])
2194             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2195         if (!skip_dct[5])
2196             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2197         if (!s->chroma_y_shift) { /* 422 */
2198             if (!skip_dct[6])
2199                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2200                                   wrap_c);
2201             if (!skip_dct[7])
2202                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2203                                   wrap_c);
2204         }
2205         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2206     }
2207
2208     /* DCT & quantize */
2209     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2210     {
2211         for (i = 0; i < mb_block_count; i++) {
2212             if (!skip_dct[i]) {
2213                 int overflow;
2214                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2215                 // FIXME we could decide to change to quantizer instead of
2216                 // clipping
2217                 // JS: I don't think that would be a good idea it could lower
2218                 //     quality instead of improve it. Just INTRADC clipping
2219                 //     deserves changes in quantizer
2220                 if (overflow)
2221                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2222             } else
2223                 s->block_last_index[i] = -1;
2224         }
2225         if (s->quantizer_noise_shaping) {
2226             for (i = 0; i < mb_block_count; i++) {
2227                 if (!skip_dct[i]) {
2228                     s->block_last_index[i] =
2229                         dct_quantize_refine(s, s->block[i], weight[i],
2230                                             orig[i], i, s->qscale);
2231                 }
2232             }
2233         }
2234
2235         if (s->luma_elim_threshold && !s->mb_intra)
2236             for (i = 0; i < 4; i++)
2237                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2238         if (s->chroma_elim_threshold && !s->mb_intra)
2239             for (i = 4; i < mb_block_count; i++)
2240                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2241
2242         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2243             for (i = 0; i < mb_block_count; i++) {
2244                 if (s->block_last_index[i] == -1)
2245                     s->coded_score[i] = INT_MAX / 256;
2246             }
2247         }
2248     }
2249
2250     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2251         s->block_last_index[4] =
2252         s->block_last_index[5] = 0;
2253         s->block[4][0] =
2254         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2255         if (!s->chroma_y_shift) { /* 422 / 444 */
2256             for (i=6; i<12; i++) {
2257                 s->block_last_index[i] = 0;
2258                 s->block[i][0] = s->block[4][0];
2259             }
2260         }
2261     }
2262
2263     // non c quantize code returns incorrect block_last_index FIXME
2264     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2265         for (i = 0; i < mb_block_count; i++) {
2266             int j;
2267             if (s->block_last_index[i] > 0) {
2268                 for (j = 63; j > 0; j--) {
2269                     if (s->block[i][s->intra_scantable.permutated[j]])
2270                         break;
2271                 }
2272                 s->block_last_index[i] = j;
2273             }
2274         }
2275     }
2276
2277     /* huffman encode */
2278     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2279     case AV_CODEC_ID_MPEG1VIDEO:
2280     case AV_CODEC_ID_MPEG2VIDEO:
2281         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2282             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2283         break;
2284     case AV_CODEC_ID_MPEG4:
2285         if (CONFIG_MPEG4_ENCODER)
2286             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2287         break;
2288     case AV_CODEC_ID_MSMPEG4V2:
2289     case AV_CODEC_ID_MSMPEG4V3:
2290     case AV_CODEC_ID_WMV1:
2291         if (CONFIG_MSMPEG4_ENCODER)
2292             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2293         break;
2294     case AV_CODEC_ID_WMV2:
2295         if (CONFIG_WMV2_ENCODER)
2296             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2297         break;
2298     case AV_CODEC_ID_H261:
2299         if (CONFIG_H261_ENCODER)
2300             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2301         break;
2302     case AV_CODEC_ID_H263:
2303     case AV_CODEC_ID_H263P:
2304     case AV_CODEC_ID_FLV1:
2305     case AV_CODEC_ID_RV10:
2306     case AV_CODEC_ID_RV20:
2307         if (CONFIG_H263_ENCODER)
2308             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2309         break;
2310     case AV_CODEC_ID_MJPEG:
2311     case AV_CODEC_ID_AMV:
2312         if (CONFIG_MJPEG_ENCODER)
2313             ff_mjpeg_encode_mb(s, s->block);
2314         break;
2315     default:
2316         av_assert1(0);
2317     }
2318 }
2319
2320 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2321 {
2322     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2323     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2324     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2325 }
2326
2327 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2328     int i;
2329
2330     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2331
2332     /* mpeg1 */
2333     d->mb_skip_run= s->mb_skip_run;
2334     for(i=0; i<3; i++)
2335         d->last_dc[i] = s->last_dc[i];
2336
2337     /* statistics */
2338     d->mv_bits= s->mv_bits;
2339     d->i_tex_bits= s->i_tex_bits;
2340     d->p_tex_bits= s->p_tex_bits;
2341     d->i_count= s->i_count;
2342     d->f_count= s->f_count;
2343     d->b_count= s->b_count;
2344     d->skip_count= s->skip_count;
2345     d->misc_bits= s->misc_bits;
2346     d->last_bits= 0;
2347
2348     d->mb_skipped= 0;
2349     d->qscale= s->qscale;
2350     d->dquant= s->dquant;
2351
2352     d->esc3_level_length= s->esc3_level_length;
2353 }
2354
2355 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2356     int i;
2357
2358     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2359     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2360
2361     /* mpeg1 */
2362     d->mb_skip_run= s->mb_skip_run;
2363     for(i=0; i<3; i++)
2364         d->last_dc[i] = s->last_dc[i];
2365
2366     /* statistics */
2367     d->mv_bits= s->mv_bits;
2368     d->i_tex_bits= s->i_tex_bits;
2369     d->p_tex_bits= s->p_tex_bits;
2370     d->i_count= s->i_count;
2371     d->f_count= s->f_count;
2372     d->b_count= s->b_count;
2373     d->skip_count= s->skip_count;
2374     d->misc_bits= s->misc_bits;
2375
2376     d->mb_intra= s->mb_intra;
2377     d->mb_skipped= s->mb_skipped;
2378     d->mv_type= s->mv_type;
2379     d->mv_dir= s->mv_dir;
2380     d->pb= s->pb;
2381     if(s->data_partitioning){
2382         d->pb2= s->pb2;
2383         d->tex_pb= s->tex_pb;
2384     }
2385     d->block= s->block;
2386     for(i=0; i<8; i++)
2387         d->block_last_index[i]= s->block_last_index[i];
2388     d->interlaced_dct= s->interlaced_dct;
2389     d->qscale= s->qscale;
2390
2391     d->esc3_level_length= s->esc3_level_length;
2392 }
2393
2394 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2395                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2396                            int *dmin, int *next_block, int motion_x, int motion_y)
2397 {
2398     int score;
2399     uint8_t *dest_backup[3];
2400
2401     copy_context_before_encode(s, backup, type);
2402
2403     s->block= s->blocks[*next_block];
2404     s->pb= pb[*next_block];
2405     if(s->data_partitioning){
2406         s->pb2   = pb2   [*next_block];
2407         s->tex_pb= tex_pb[*next_block];
2408     }
2409
2410     if(*next_block){
2411         memcpy(dest_backup, s->dest, sizeof(s->dest));
2412         s->dest[0] = s->rd_scratchpad;
2413         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2414         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2415         assert(s->linesize >= 32); //FIXME
2416     }
2417
2418     encode_mb(s, motion_x, motion_y);
2419
2420     score= put_bits_count(&s->pb);
2421     if(s->data_partitioning){
2422         score+= put_bits_count(&s->pb2);
2423         score+= put_bits_count(&s->tex_pb);
2424     }
2425
2426     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2427         ff_MPV_decode_mb(s, s->block);
2428
2429         score *= s->lambda2;
2430         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2431     }
2432
2433     if(*next_block){
2434         memcpy(s->dest, dest_backup, sizeof(s->dest));
2435     }
2436
2437     if(score<*dmin){
2438         *dmin= score;
2439         *next_block^=1;
2440
2441         copy_context_after_encode(best, s, type);
2442     }
2443 }
2444
2445 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2446     uint32_t *sq = ff_squareTbl + 256;
2447     int acc=0;
2448     int x,y;
2449
2450     if(w==16 && h==16)
2451         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2452     else if(w==8 && h==8)
2453         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2454
2455     for(y=0; y<h; y++){
2456         for(x=0; x<w; x++){
2457             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2458         }
2459     }
2460
2461     av_assert2(acc>=0);
2462
2463     return acc;
2464 }
2465
2466 static int sse_mb(MpegEncContext *s){
2467     int w= 16;
2468     int h= 16;
2469
2470     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2471     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2472
2473     if(w==16 && h==16)
2474       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2475         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2476                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2477                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2478       }else{
2479         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2480                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2481                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2482       }
2483     else
2484         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2485                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2486                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2487 }
2488
2489 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2490     MpegEncContext *s= *(void**)arg;
2491
2492
2493     s->me.pre_pass=1;
2494     s->me.dia_size= s->avctx->pre_dia_size;
2495     s->first_slice_line=1;
2496     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2497         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2498             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2499         }
2500         s->first_slice_line=0;
2501     }
2502
2503     s->me.pre_pass=0;
2504
2505     return 0;
2506 }
2507
2508 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2509     MpegEncContext *s= *(void**)arg;
2510
2511     ff_check_alignment();
2512
2513     s->me.dia_size= s->avctx->dia_size;
2514     s->first_slice_line=1;
2515     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2516         s->mb_x=0; //for block init below
2517         ff_init_block_index(s);
2518         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2519             s->block_index[0]+=2;
2520             s->block_index[1]+=2;
2521             s->block_index[2]+=2;
2522             s->block_index[3]+=2;
2523
2524             /* compute motion vector & mb_type and store in context */
2525             if(s->pict_type==AV_PICTURE_TYPE_B)
2526                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2527             else
2528                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2529         }
2530         s->first_slice_line=0;
2531     }
2532     return 0;
2533 }
2534
2535 static int mb_var_thread(AVCodecContext *c, void *arg){
2536     MpegEncContext *s= *(void**)arg;
2537     int mb_x, mb_y;
2538
2539     ff_check_alignment();
2540
2541     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2542         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2543             int xx = mb_x * 16;
2544             int yy = mb_y * 16;
2545             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2546             int varc;
2547             int sum = s->dsp.pix_sum(pix, s->linesize);
2548
2549             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2550
2551             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2552             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2553             s->me.mb_var_sum_temp    += varc;
2554         }
2555     }
2556     return 0;
2557 }
2558
2559 static void write_slice_end(MpegEncContext *s){
2560     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2561         if(s->partitioned_frame){
2562             ff_mpeg4_merge_partitions(s);
2563         }
2564
2565         ff_mpeg4_stuffing(&s->pb);
2566     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2567         ff_mjpeg_encode_stuffing(s);
2568     }
2569
2570     avpriv_align_put_bits(&s->pb);
2571     flush_put_bits(&s->pb);
2572
2573     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2574         s->misc_bits+= get_bits_diff(s);
2575 }
2576
2577 static void write_mb_info(MpegEncContext *s)
2578 {
2579     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2580     int offset = put_bits_count(&s->pb);
2581     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2582     int gobn = s->mb_y / s->gob_index;
2583     int pred_x, pred_y;
2584     if (CONFIG_H263_ENCODER)
2585         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2586     bytestream_put_le32(&ptr, offset);
2587     bytestream_put_byte(&ptr, s->qscale);
2588     bytestream_put_byte(&ptr, gobn);
2589     bytestream_put_le16(&ptr, mba);
2590     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2591     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2592     /* 4MV not implemented */
2593     bytestream_put_byte(&ptr, 0); /* hmv2 */
2594     bytestream_put_byte(&ptr, 0); /* vmv2 */
2595 }
2596
2597 static void update_mb_info(MpegEncContext *s, int startcode)
2598 {
2599     if (!s->mb_info)
2600         return;
2601     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2602         s->mb_info_size += 12;
2603         s->prev_mb_info = s->last_mb_info;
2604     }
2605     if (startcode) {
2606         s->prev_mb_info = put_bits_count(&s->pb)/8;
2607         /* This might have incremented mb_info_size above, and we return without
2608          * actually writing any info into that slot yet. But in that case,
2609          * this will be called again at the start of the after writing the
2610          * start code, actually writing the mb info. */
2611         return;
2612     }
2613
2614     s->last_mb_info = put_bits_count(&s->pb)/8;
2615     if (!s->mb_info_size)
2616         s->mb_info_size += 12;
2617     write_mb_info(s);
2618 }
2619
2620 static int encode_thread(AVCodecContext *c, void *arg){
2621     MpegEncContext *s= *(void**)arg;
2622     int mb_x, mb_y, pdif = 0;
2623     int chr_h= 16>>s->chroma_y_shift;
2624     int i, j;
2625     MpegEncContext best_s, backup_s;
2626     uint8_t bit_buf[2][MAX_MB_BYTES];
2627     uint8_t bit_buf2[2][MAX_MB_BYTES];
2628     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2629     PutBitContext pb[2], pb2[2], tex_pb[2];
2630
2631     ff_check_alignment();
2632
2633     for(i=0; i<2; i++){
2634         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2635         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2636         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2637     }
2638
2639     s->last_bits= put_bits_count(&s->pb);
2640     s->mv_bits=0;
2641     s->misc_bits=0;
2642     s->i_tex_bits=0;
2643     s->p_tex_bits=0;
2644     s->i_count=0;
2645     s->f_count=0;
2646     s->b_count=0;
2647     s->skip_count=0;
2648
2649     for(i=0; i<3; i++){
2650         /* init last dc values */
2651         /* note: quant matrix value (8) is implied here */
2652         s->last_dc[i] = 128 << s->intra_dc_precision;
2653
2654         s->current_picture.f.error[i] = 0;
2655     }
2656     if(s->codec_id==AV_CODEC_ID_AMV){
2657         s->last_dc[0] = 128*8/13;
2658         s->last_dc[1] = 128*8/14;
2659         s->last_dc[2] = 128*8/14;
2660     }
2661     s->mb_skip_run = 0;
2662     memset(s->last_mv, 0, sizeof(s->last_mv));
2663
2664     s->last_mv_dir = 0;
2665
2666     switch(s->codec_id){
2667     case AV_CODEC_ID_H263:
2668     case AV_CODEC_ID_H263P:
2669     case AV_CODEC_ID_FLV1:
2670         if (CONFIG_H263_ENCODER)
2671             s->gob_index = ff_h263_get_gob_height(s);
2672         break;
2673     case AV_CODEC_ID_MPEG4:
2674         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2675             ff_mpeg4_init_partitions(s);
2676         break;
2677     }
2678
2679     s->resync_mb_x=0;
2680     s->resync_mb_y=0;
2681     s->first_slice_line = 1;
2682     s->ptr_lastgob = s->pb.buf;
2683     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2684         s->mb_x=0;
2685         s->mb_y= mb_y;
2686
2687         ff_set_qscale(s, s->qscale);
2688         ff_init_block_index(s);
2689
2690         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2691             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2692             int mb_type= s->mb_type[xy];
2693 //            int d;
2694             int dmin= INT_MAX;
2695             int dir;
2696
2697             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2698                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2699                 return -1;
2700             }
2701             if(s->data_partitioning){
2702                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2703                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2704                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2705                     return -1;
2706                 }
2707             }
2708
2709             s->mb_x = mb_x;
2710             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2711             ff_update_block_index(s);
2712
2713             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2714                 ff_h261_reorder_mb_index(s);
2715                 xy= s->mb_y*s->mb_stride + s->mb_x;
2716                 mb_type= s->mb_type[xy];
2717             }
2718
2719             /* write gob / video packet header  */
2720             if(s->rtp_mode){
2721                 int current_packet_size, is_gob_start;
2722
2723                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2724
2725                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2726
2727                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2728
2729                 switch(s->codec_id){
2730                 case AV_CODEC_ID_H263:
2731                 case AV_CODEC_ID_H263P:
2732                     if(!s->h263_slice_structured)
2733                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2734                     break;
2735                 case AV_CODEC_ID_MPEG2VIDEO:
2736                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2737                 case AV_CODEC_ID_MPEG1VIDEO:
2738                     if(s->mb_skip_run) is_gob_start=0;
2739                     break;
2740                 case AV_CODEC_ID_MJPEG:
2741                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2742                     break;
2743                 }
2744
2745                 if(is_gob_start){
2746                     if(s->start_mb_y != mb_y || mb_x!=0){
2747                         write_slice_end(s);
2748
2749                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2750                             ff_mpeg4_init_partitions(s);
2751                         }
2752                     }
2753
2754                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2755                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2756
2757                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2758                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2759                         int d = 100 / s->error_rate;
2760                         if(r % d == 0){
2761                             current_packet_size=0;
2762                             s->pb.buf_ptr= s->ptr_lastgob;
2763                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2764                         }
2765                     }
2766
2767                     if (s->avctx->rtp_callback){
2768                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2769                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2770                     }
2771                     update_mb_info(s, 1);
2772
2773                     switch(s->codec_id){
2774                     case AV_CODEC_ID_MPEG4:
2775                         if (CONFIG_MPEG4_ENCODER) {
2776                             ff_mpeg4_encode_video_packet_header(s);
2777                             ff_mpeg4_clean_buffers(s);
2778                         }
2779                     break;
2780                     case AV_CODEC_ID_MPEG1VIDEO:
2781                     case AV_CODEC_ID_MPEG2VIDEO:
2782                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2783                             ff_mpeg1_encode_slice_header(s);
2784                             ff_mpeg1_clean_buffers(s);
2785                         }
2786                     break;
2787                     case AV_CODEC_ID_H263:
2788                     case AV_CODEC_ID_H263P:
2789                         if (CONFIG_H263_ENCODER)
2790                             ff_h263_encode_gob_header(s, mb_y);
2791                     break;
2792                     }
2793
2794                     if(s->flags&CODEC_FLAG_PASS1){
2795                         int bits= put_bits_count(&s->pb);
2796                         s->misc_bits+= bits - s->last_bits;
2797                         s->last_bits= bits;
2798                     }
2799
2800                     s->ptr_lastgob += current_packet_size;
2801                     s->first_slice_line=1;
2802                     s->resync_mb_x=mb_x;
2803                     s->resync_mb_y=mb_y;
2804                 }
2805             }
2806
2807             if(  (s->resync_mb_x   == s->mb_x)
2808                && s->resync_mb_y+1 == s->mb_y){
2809                 s->first_slice_line=0;
2810             }
2811
2812             s->mb_skipped=0;
2813             s->dquant=0; //only for QP_RD
2814
2815             update_mb_info(s, 0);
2816
2817             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2818                 int next_block=0;
2819                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2820
2821                 copy_context_before_encode(&backup_s, s, -1);
2822                 backup_s.pb= s->pb;
2823                 best_s.data_partitioning= s->data_partitioning;
2824                 best_s.partitioned_frame= s->partitioned_frame;
2825                 if(s->data_partitioning){
2826                     backup_s.pb2= s->pb2;
2827                     backup_s.tex_pb= s->tex_pb;
2828                 }
2829
2830                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2831                     s->mv_dir = MV_DIR_FORWARD;
2832                     s->mv_type = MV_TYPE_16X16;
2833                     s->mb_intra= 0;
2834                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2835                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2836                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2837                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2838                 }
2839                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2840                     s->mv_dir = MV_DIR_FORWARD;
2841                     s->mv_type = MV_TYPE_FIELD;
2842                     s->mb_intra= 0;
2843                     for(i=0; i<2; i++){
2844                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2845                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2846                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2847                     }
2848                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2849                                  &dmin, &next_block, 0, 0);
2850                 }
2851                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2852                     s->mv_dir = MV_DIR_FORWARD;
2853                     s->mv_type = MV_TYPE_16X16;
2854                     s->mb_intra= 0;
2855                     s->mv[0][0][0] = 0;
2856                     s->mv[0][0][1] = 0;
2857                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2858                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2859                 }
2860                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2861                     s->mv_dir = MV_DIR_FORWARD;
2862                     s->mv_type = MV_TYPE_8X8;
2863                     s->mb_intra= 0;
2864                     for(i=0; i<4; i++){
2865                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2866                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2867                     }
2868                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2869                                  &dmin, &next_block, 0, 0);
2870                 }
2871                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2872                     s->mv_dir = MV_DIR_FORWARD;
2873                     s->mv_type = MV_TYPE_16X16;
2874                     s->mb_intra= 0;
2875                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2876                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2877                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2878                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2879                 }
2880                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2881                     s->mv_dir = MV_DIR_BACKWARD;
2882                     s->mv_type = MV_TYPE_16X16;
2883                     s->mb_intra= 0;
2884                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2885                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2886                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2887                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2888                 }
2889                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2890                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2891                     s->mv_type = MV_TYPE_16X16;
2892                     s->mb_intra= 0;
2893                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2894                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2895                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2896                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2897                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2898                                  &dmin, &next_block, 0, 0);
2899                 }
2900                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2901                     s->mv_dir = MV_DIR_FORWARD;
2902                     s->mv_type = MV_TYPE_FIELD;
2903                     s->mb_intra= 0;
2904                     for(i=0; i<2; i++){
2905                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2906                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2907                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2908                     }
2909                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2910                                  &dmin, &next_block, 0, 0);
2911                 }
2912                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2913                     s->mv_dir = MV_DIR_BACKWARD;
2914                     s->mv_type = MV_TYPE_FIELD;
2915                     s->mb_intra= 0;
2916                     for(i=0; i<2; i++){
2917                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2918                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2919                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2920                     }
2921                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2922                                  &dmin, &next_block, 0, 0);
2923                 }
2924                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2925                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2926                     s->mv_type = MV_TYPE_FIELD;
2927                     s->mb_intra= 0;
2928                     for(dir=0; dir<2; dir++){
2929                         for(i=0; i<2; i++){
2930                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2931                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2932                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2933                         }
2934                     }
2935                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2936                                  &dmin, &next_block, 0, 0);
2937                 }
2938                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2939                     s->mv_dir = 0;
2940                     s->mv_type = MV_TYPE_16X16;
2941                     s->mb_intra= 1;
2942                     s->mv[0][0][0] = 0;
2943                     s->mv[0][0][1] = 0;
2944                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2945                                  &dmin, &next_block, 0, 0);
2946                     if(s->h263_pred || s->h263_aic){
2947                         if(best_s.mb_intra)
2948                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2949                         else
2950                             ff_clean_intra_table_entries(s); //old mode?
2951                     }
2952                 }
2953
2954                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2955                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2956                         const int last_qp= backup_s.qscale;
2957                         int qpi, qp, dc[6];
2958                         int16_t ac[6][16];
2959                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2960                         static const int dquant_tab[4]={-1,1,-2,2};
2961                         int storecoefs = s->mb_intra && s->dc_val[0];
2962
2963                         av_assert2(backup_s.dquant == 0);
2964
2965                         //FIXME intra
2966                         s->mv_dir= best_s.mv_dir;
2967                         s->mv_type = MV_TYPE_16X16;
2968                         s->mb_intra= best_s.mb_intra;
2969                         s->mv[0][0][0] = best_s.mv[0][0][0];
2970                         s->mv[0][0][1] = best_s.mv[0][0][1];
2971                         s->mv[1][0][0] = best_s.mv[1][0][0];
2972                         s->mv[1][0][1] = best_s.mv[1][0][1];
2973
2974                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2975                         for(; qpi<4; qpi++){
2976                             int dquant= dquant_tab[qpi];
2977                             qp= last_qp + dquant;
2978                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2979                                 continue;
2980                             backup_s.dquant= dquant;
2981                             if(storecoefs){
2982                                 for(i=0; i<6; i++){
2983                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2984                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2985                                 }
2986                             }
2987
2988                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2989                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2990                             if(best_s.qscale != qp){
2991                                 if(storecoefs){
2992                                     for(i=0; i<6; i++){
2993                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2994                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2995                                     }
2996                                 }
2997                             }
2998                         }
2999                     }
3000                 }
3001                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3002                     int mx= s->b_direct_mv_table[xy][0];
3003                     int my= s->b_direct_mv_table[xy][1];
3004
3005                     backup_s.dquant = 0;
3006                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3007                     s->mb_intra= 0;
3008                     ff_mpeg4_set_direct_mv(s, mx, my);
3009                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3010                                  &dmin, &next_block, mx, my);
3011                 }
3012                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3013                     backup_s.dquant = 0;
3014                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3015                     s->mb_intra= 0;
3016                     ff_mpeg4_set_direct_mv(s, 0, 0);
3017                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3018                                  &dmin, &next_block, 0, 0);
3019                 }
3020                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3021                     int coded=0;
3022                     for(i=0; i<6; i++)
3023                         coded |= s->block_last_index[i];
3024                     if(coded){
3025                         int mx,my;
3026                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3027                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3028                             mx=my=0; //FIXME find the one we actually used
3029                             ff_mpeg4_set_direct_mv(s, mx, my);
3030                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3031                             mx= s->mv[1][0][0];
3032                             my= s->mv[1][0][1];
3033                         }else{
3034                             mx= s->mv[0][0][0];
3035                             my= s->mv[0][0][1];
3036                         }
3037
3038                         s->mv_dir= best_s.mv_dir;
3039                         s->mv_type = best_s.mv_type;
3040                         s->mb_intra= 0;
3041 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3042                         s->mv[0][0][1] = best_s.mv[0][0][1];
3043                         s->mv[1][0][0] = best_s.mv[1][0][0];
3044                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3045                         backup_s.dquant= 0;
3046                         s->skipdct=1;
3047                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3048                                         &dmin, &next_block, mx, my);
3049                         s->skipdct=0;
3050                     }
3051                 }
3052
3053                 s->current_picture.qscale_table[xy] = best_s.qscale;
3054
3055                 copy_context_after_encode(s, &best_s, -1);
3056
3057                 pb_bits_count= put_bits_count(&s->pb);
3058                 flush_put_bits(&s->pb);
3059                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3060                 s->pb= backup_s.pb;
3061
3062                 if(s->data_partitioning){
3063                     pb2_bits_count= put_bits_count(&s->pb2);
3064                     flush_put_bits(&s->pb2);
3065                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3066                     s->pb2= backup_s.pb2;
3067
3068                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3069                     flush_put_bits(&s->tex_pb);
3070                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3071                     s->tex_pb= backup_s.tex_pb;
3072                 }
3073                 s->last_bits= put_bits_count(&s->pb);
3074
3075                 if (CONFIG_H263_ENCODER &&
3076                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3077                     ff_h263_update_motion_val(s);
3078
3079                 if(next_block==0){ //FIXME 16 vs linesize16
3080                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3081                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3082                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3083                 }
3084
3085                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3086                     ff_MPV_decode_mb(s, s->block);
3087             } else {
3088                 int motion_x = 0, motion_y = 0;
3089                 s->mv_type=MV_TYPE_16X16;
3090                 // only one MB-Type possible
3091
3092                 switch(mb_type){
3093                 case CANDIDATE_MB_TYPE_INTRA:
3094                     s->mv_dir = 0;
3095                     s->mb_intra= 1;
3096                     motion_x= s->mv[0][0][0] = 0;
3097                     motion_y= s->mv[0][0][1] = 0;
3098                     break;
3099                 case CANDIDATE_MB_TYPE_INTER:
3100                     s->mv_dir = MV_DIR_FORWARD;
3101                     s->mb_intra= 0;
3102                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3103                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3104                     break;
3105                 case CANDIDATE_MB_TYPE_INTER_I:
3106                     s->mv_dir = MV_DIR_FORWARD;
3107                     s->mv_type = MV_TYPE_FIELD;
3108                     s->mb_intra= 0;
3109                     for(i=0; i<2; i++){
3110                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3111                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3112                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3113                     }
3114                     break;
3115                 case CANDIDATE_MB_TYPE_INTER4V:
3116                     s->mv_dir = MV_DIR_FORWARD;
3117                     s->mv_type = MV_TYPE_8X8;
3118                     s->mb_intra= 0;
3119                     for(i=0; i<4; i++){
3120                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3121                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3122                     }
3123                     break;
3124                 case CANDIDATE_MB_TYPE_DIRECT:
3125                     if (CONFIG_MPEG4_ENCODER) {
3126                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3127                         s->mb_intra= 0;
3128                         motion_x=s->b_direct_mv_table[xy][0];
3129                         motion_y=s->b_direct_mv_table[xy][1];
3130                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3131                     }
3132                     break;
3133                 case CANDIDATE_MB_TYPE_DIRECT0:
3134                     if (CONFIG_MPEG4_ENCODER) {
3135                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3136                         s->mb_intra= 0;
3137                         ff_mpeg4_set_direct_mv(s, 0, 0);
3138                     }
3139                     break;
3140                 case CANDIDATE_MB_TYPE_BIDIR:
3141                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3142                     s->mb_intra= 0;
3143                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3144                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3145                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3146                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3147                     break;
3148                 case CANDIDATE_MB_TYPE_BACKWARD:
3149                     s->mv_dir = MV_DIR_BACKWARD;
3150                     s->mb_intra= 0;
3151                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3152                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3153                     break;
3154                 case CANDIDATE_MB_TYPE_FORWARD:
3155                     s->mv_dir = MV_DIR_FORWARD;
3156                     s->mb_intra= 0;
3157                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3158                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3159                     break;
3160                 case CANDIDATE_MB_TYPE_FORWARD_I:
3161                     s->mv_dir = MV_DIR_FORWARD;
3162                     s->mv_type = MV_TYPE_FIELD;
3163                     s->mb_intra= 0;
3164                     for(i=0; i<2; i++){
3165                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3166                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3167                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3168                     }
3169                     break;
3170                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3171                     s->mv_dir = MV_DIR_BACKWARD;
3172                     s->mv_type = MV_TYPE_FIELD;
3173                     s->mb_intra= 0;
3174                     for(i=0; i<2; i++){
3175                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3176                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3177                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3178                     }
3179                     break;
3180                 case CANDIDATE_MB_TYPE_BIDIR_I:
3181                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3182                     s->mv_type = MV_TYPE_FIELD;
3183                     s->mb_intra= 0;
3184                     for(dir=0; dir<2; dir++){
3185                         for(i=0; i<2; i++){
3186                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3187                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3188                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3189                         }
3190                     }
3191                     break;
3192                 default:
3193                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3194                 }
3195
3196                 encode_mb(s, motion_x, motion_y);
3197
3198                 // RAL: Update last macroblock type
3199                 s->last_mv_dir = s->mv_dir;
3200
3201                 if (CONFIG_H263_ENCODER &&
3202                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3203                     ff_h263_update_motion_val(s);
3204
3205                 ff_MPV_decode_mb(s, s->block);
3206             }
3207
3208             /* clean the MV table in IPS frames for direct mode in B frames */
3209             if(s->mb_intra /* && I,P,S_TYPE */){
3210                 s->p_mv_table[xy][0]=0;
3211                 s->p_mv_table[xy][1]=0;
3212             }
3213
3214             if(s->flags&CODEC_FLAG_PSNR){
3215                 int w= 16;
3216                 int h= 16;
3217
3218                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3219                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3220
3221                 s->current_picture.f.error[0] += sse(
3222                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3223                     s->dest[0], w, h, s->linesize);
3224                 s->current_picture.f.error[1] += sse(
3225                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3226                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3227                 s->current_picture.f.error[2] += sse(
3228                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3229                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3230             }
3231             if(s->loop_filter){
3232                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3233                     ff_h263_loop_filter(s);
3234             }
3235             av_dlog(s->avctx, "MB %d %d bits\n",
3236                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3237         }
3238     }
3239
3240     //not beautiful here but we must write it before flushing so it has to be here
3241     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3242         ff_msmpeg4_encode_ext_header(s);
3243
3244     write_slice_end(s);
3245
3246     /* Send the last GOB if RTP */
3247     if (s->avctx->rtp_callback) {
3248         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3249         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3250         /* Call the RTP callback to send the last GOB */
3251         emms_c();
3252         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3253     }
3254
3255     return 0;
3256 }
3257
3258 #define MERGE(field) dst->field += src->field; src->field=0
3259 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3260     MERGE(me.scene_change_score);
3261     MERGE(me.mc_mb_var_sum_temp);
3262     MERGE(me.mb_var_sum_temp);
3263 }
3264
3265 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3266     int i;
3267
3268     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3269     MERGE(dct_count[1]);
3270     MERGE(mv_bits);
3271     MERGE(i_tex_bits);
3272     MERGE(p_tex_bits);
3273     MERGE(i_count);
3274     MERGE(f_count);
3275     MERGE(b_count);
3276     MERGE(skip_count);
3277     MERGE(misc_bits);
3278     MERGE(er.error_count);
3279     MERGE(padding_bug_score);
3280     MERGE(current_picture.f.error[0]);
3281     MERGE(current_picture.f.error[1]);
3282     MERGE(current_picture.f.error[2]);
3283
3284     if(dst->avctx->noise_reduction){
3285         for(i=0; i<64; i++){
3286             MERGE(dct_error_sum[0][i]);
3287             MERGE(dct_error_sum[1][i]);
3288         }
3289     }
3290
3291     assert(put_bits_count(&src->pb) % 8 ==0);
3292     assert(put_bits_count(&dst->pb) % 8 ==0);
3293     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3294     flush_put_bits(&dst->pb);
3295 }
3296
3297 static int estimate_qp(MpegEncContext *s, int dry_run){
3298     if (s->next_lambda){
3299         s->current_picture_ptr->f.quality =
3300         s->current_picture.f.quality = s->next_lambda;
3301         if(!dry_run) s->next_lambda= 0;
3302     } else if (!s->fixed_qscale) {
3303         s->current_picture_ptr->f.quality =
3304         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3305         if (s->current_picture.f.quality < 0)
3306             return -1;
3307     }
3308
3309     if(s->adaptive_quant){
3310         switch(s->codec_id){
3311         case AV_CODEC_ID_MPEG4:
3312             if (CONFIG_MPEG4_ENCODER)
3313                 ff_clean_mpeg4_qscales(s);
3314             break;
3315         case AV_CODEC_ID_H263:
3316         case AV_CODEC_ID_H263P:
3317         case AV_CODEC_ID_FLV1:
3318             if (CONFIG_H263_ENCODER)
3319                 ff_clean_h263_qscales(s);
3320             break;
3321         default:
3322             ff_init_qscale_tab(s);
3323         }
3324
3325         s->lambda= s->lambda_table[0];
3326         //FIXME broken
3327     }else
3328         s->lambda = s->current_picture.f.quality;
3329     update_qscale(s);
3330     return 0;
3331 }
3332
3333 /* must be called before writing the header */
3334 static void set_frame_distances(MpegEncContext * s){
3335     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3336     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3337
3338     if(s->pict_type==AV_PICTURE_TYPE_B){
3339         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3340         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3341     }else{
3342         s->pp_time= s->time - s->last_non_b_time;
3343         s->last_non_b_time= s->time;
3344         assert(s->picture_number==0 || s->pp_time > 0);
3345     }
3346 }
3347
3348 static int encode_picture(MpegEncContext *s, int picture_number)
3349 {
3350     int i, ret;
3351     int bits;
3352     int context_count = s->slice_context_count;
3353
3354     s->picture_number = picture_number;
3355
3356     /* Reset the average MB variance */
3357     s->me.mb_var_sum_temp    =
3358     s->me.mc_mb_var_sum_temp = 0;
3359
3360     /* we need to initialize some time vars before we can encode b-frames */
3361     // RAL: Condition added for MPEG1VIDEO
3362     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3363         set_frame_distances(s);
3364     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3365         ff_set_mpeg4_time(s);
3366
3367     s->me.scene_change_score=0;
3368
3369 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3370
3371     if(s->pict_type==AV_PICTURE_TYPE_I){
3372         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3373         else                        s->no_rounding=0;
3374     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3375         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3376             s->no_rounding ^= 1;
3377     }
3378
3379     if(s->flags & CODEC_FLAG_PASS2){
3380         if (estimate_qp(s,1) < 0)
3381             return -1;
3382         ff_get_2pass_fcode(s);
3383     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3384         if(s->pict_type==AV_PICTURE_TYPE_B)
3385             s->lambda= s->last_lambda_for[s->pict_type];
3386         else
3387             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3388         update_qscale(s);
3389     }
3390
3391     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3392         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3393         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3394         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3395         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3396     }
3397
3398     s->mb_intra=0; //for the rate distortion & bit compare functions
3399     for(i=1; i<context_count; i++){
3400         ret = ff_update_duplicate_context(s->thread_context[i], s);
3401         if (ret < 0)
3402             return ret;
3403     }
3404
3405     if(ff_init_me(s)<0)
3406         return -1;
3407
3408     /* Estimate motion for every MB */
3409     if(s->pict_type != AV_PICTURE_TYPE_I){
3410         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3411         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3412         if (s->pict_type != AV_PICTURE_TYPE_B) {
3413             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3414                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3415             }
3416         }
3417
3418         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3419     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3420         /* I-Frame */
3421         for(i=0; i<s->mb_stride*s->mb_height; i++)
3422             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3423
3424         if(!s->fixed_qscale){
3425             /* finding spatial complexity for I-frame rate control */
3426             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3427         }
3428     }
3429     for(i=1; i<context_count; i++){
3430         merge_context_after_me(s, s->thread_context[i]);
3431     }
3432     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3433     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3434     emms_c();
3435
3436     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3437         s->pict_type= AV_PICTURE_TYPE_I;
3438         for(i=0; i<s->mb_stride*s->mb_height; i++)
3439             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3440         if(s->msmpeg4_version >= 3)
3441             s->no_rounding=1;
3442         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3443                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3444     }
3445
3446     if(!s->umvplus){
3447         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3448             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3449
3450             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3451                 int a,b;
3452                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3453                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3454                 s->f_code= FFMAX3(s->f_code, a, b);
3455             }
3456
3457             ff_fix_long_p_mvs(s);
3458             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3459             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3460                 int j;
3461                 for(i=0; i<2; i++){
3462                     for(j=0; j<2; j++)
3463                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3464                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3465                 }
3466             }
3467         }
3468
3469         if(s->pict_type==AV_PICTURE_TYPE_B){
3470             int a, b;
3471
3472             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3473             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3474             s->f_code = FFMAX(a, b);
3475
3476             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3477             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3478             s->b_code = FFMAX(a, b);
3479
3480             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3481             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3482             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3483             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3484             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3485                 int dir, j;
3486                 for(dir=0; dir<2; dir++){
3487                     for(i=0; i<2; i++){
3488                         for(j=0; j<2; j++){
3489                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3490                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3491                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3492                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3493                         }
3494                     }
3495                 }
3496             }
3497         }
3498     }
3499
3500     if (estimate_qp(s, 0) < 0)
3501         return -1;
3502
3503     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3504         s->qscale= 3; //reduce clipping problems
3505
3506     if (s->out_format == FMT_MJPEG) {
3507         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3508         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3509
3510         if (s->avctx->intra_matrix) {
3511             chroma_matrix =
3512             luma_matrix = s->avctx->intra_matrix;
3513         }
3514         if (s->avctx->chroma_intra_matrix)
3515             chroma_matrix = s->avctx->chroma_intra_matrix;
3516
3517         /* for mjpeg, we do include qscale in the matrix */
3518         for(i=1;i<64;i++){
3519             int j= s->dsp.idct_permutation[i];
3520
3521             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3522             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3523         }
3524         s->y_dc_scale_table=
3525         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3526         s->chroma_intra_matrix[0] =
3527         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3528         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3529                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3530         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3531                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3532         s->qscale= 8;
3533     }
3534     if(s->codec_id == AV_CODEC_ID_AMV){
3535         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3536         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3537         for(i=1;i<64;i++){
3538             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3539
3540             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3541             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3542         }
3543         s->y_dc_scale_table= y;
3544         s->c_dc_scale_table= c;
3545         s->intra_matrix[0] = 13;
3546         s->chroma_intra_matrix[0] = 14;
3547         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3548                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3549         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3550                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3551         s->qscale= 8;
3552     }
3553
3554     //FIXME var duplication
3555     s->current_picture_ptr->f.key_frame =
3556     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3557     s->current_picture_ptr->f.pict_type =
3558     s->current_picture.f.pict_type = s->pict_type;
3559
3560     if (s->current_picture.f.key_frame)
3561         s->picture_in_gop_number=0;
3562
3563     s->mb_x = s->mb_y = 0;
3564     s->last_bits= put_bits_count(&s->pb);
3565     switch(s->out_format) {
3566     case FMT_MJPEG:
3567         if (CONFIG_MJPEG_ENCODER)
3568             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3569                                            s->intra_matrix, s->chroma_intra_matrix);
3570         break;
3571     case FMT_H261:
3572         if (CONFIG_H261_ENCODER)
3573             ff_h261_encode_picture_header(s, picture_number);
3574         break;
3575     case FMT_H263:
3576         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3577             ff_wmv2_encode_picture_header(s, picture_number);
3578         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3579             ff_msmpeg4_encode_picture_header(s, picture_number);
3580         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3581             ff_mpeg4_encode_picture_header(s, picture_number);
3582         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3583             ff_rv10_encode_picture_header(s, picture_number);
3584         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3585             ff_rv20_encode_picture_header(s, picture_number);
3586         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3587             ff_flv_encode_picture_header(s, picture_number);
3588         else if (CONFIG_H263_ENCODER)
3589             ff_h263_encode_picture_header(s, picture_number);
3590         break;
3591     case FMT_MPEG1:
3592         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3593             ff_mpeg1_encode_picture_header(s, picture_number);
3594         break;
3595     default:
3596         av_assert0(0);
3597     }
3598     bits= put_bits_count(&s->pb);
3599     s->header_bits= bits - s->last_bits;
3600
3601     for(i=1; i<context_count; i++){
3602         update_duplicate_context_after_me(s->thread_context[i], s);
3603     }
3604     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3605     for(i=1; i<context_count; i++){
3606         merge_context_after_encode(s, s->thread_context[i]);
3607     }
3608     emms_c();
3609     return 0;
3610 }
3611
3612 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3613     const int intra= s->mb_intra;
3614     int i;
3615
3616     s->dct_count[intra]++;
3617
3618     for(i=0; i<64; i++){
3619         int level= block[i];
3620
3621         if(level){
3622             if(level>0){
3623                 s->dct_error_sum[intra][i] += level;
3624                 level -= s->dct_offset[intra][i];
3625                 if(level<0) level=0;
3626             }else{
3627                 s->dct_error_sum[intra][i] -= level;
3628                 level += s->dct_offset[intra][i];
3629                 if(level>0) level=0;
3630             }
3631             block[i]= level;
3632         }
3633     }
3634 }
3635
3636 static int dct_quantize_trellis_c(MpegEncContext *s,
3637                                   int16_t *block, int n,
3638                                   int qscale, int *overflow){
3639     const int *qmat;
3640     const uint8_t *scantable= s->intra_scantable.scantable;
3641     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3642     int max=0;
3643     unsigned int threshold1, threshold2;
3644     int bias=0;
3645     int run_tab[65];
3646     int level_tab[65];
3647     int score_tab[65];
3648     int survivor[65];
3649     int survivor_count;
3650     int last_run=0;
3651     int last_level=0;
3652     int last_score= 0;
3653     int last_i;
3654     int coeff[2][64];
3655     int coeff_count[64];
3656     int qmul, qadd, start_i, last_non_zero, i, dc;
3657     const int esc_length= s->ac_esc_length;
3658     uint8_t * length;
3659     uint8_t * last_length;
3660     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3661
3662     s->dsp.fdct (block);
3663
3664     if(s->dct_error_sum)
3665         s->denoise_dct(s, block);
3666     qmul= qscale*16;
3667     qadd= ((qscale-1)|1)*8;
3668
3669     if (s->mb_intra) {
3670         int q;
3671         if (!s->h263_aic) {
3672             if (n < 4)
3673                 q = s->y_dc_scale;
3674             else
3675                 q = s->c_dc_scale;
3676             q = q << 3;
3677         } else{
3678             /* For AIC we skip quant/dequant of INTRADC */
3679             q = 1 << 3;
3680             qadd=0;
3681         }
3682
3683         /* note: block[0] is assumed to be positive */
3684         block[0] = (block[0] + (q >> 1)) / q;
3685         start_i = 1;
3686         last_non_zero = 0;
3687         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3688         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3689             bias= 1<<(QMAT_SHIFT-1);
3690         length     = s->intra_ac_vlc_length;
3691         last_length= s->intra_ac_vlc_last_length;
3692     } else {
3693         start_i = 0;
3694         last_non_zero = -1;
3695         qmat = s->q_inter_matrix[qscale];
3696         length     = s->inter_ac_vlc_length;
3697         last_length= s->inter_ac_vlc_last_length;
3698     }
3699     last_i= start_i;
3700
3701     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3702     threshold2= (threshold1<<1);
3703
3704     for(i=63; i>=start_i; i--) {
3705         const int j = scantable[i];
3706         int level = block[j] * qmat[j];
3707
3708         if(((unsigned)(level+threshold1))>threshold2){
3709             last_non_zero = i;
3710             break;
3711         }
3712     }
3713
3714     for(i=start_i; i<=last_non_zero; i++) {
3715         const int j = scantable[i];
3716         int level = block[j] * qmat[j];
3717
3718 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3719 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3720         if(((unsigned)(level+threshold1))>threshold2){
3721             if(level>0){
3722                 level= (bias + level)>>QMAT_SHIFT;
3723                 coeff[0][i]= level;
3724                 coeff[1][i]= level-1;
3725 //                coeff[2][k]= level-2;
3726             }else{
3727                 level= (bias - level)>>QMAT_SHIFT;
3728                 coeff[0][i]= -level;
3729                 coeff[1][i]= -level+1;
3730 //                coeff[2][k]= -level+2;
3731             }
3732             coeff_count[i]= FFMIN(level, 2);
3733             av_assert2(coeff_count[i]);
3734             max |=level;
3735         }else{
3736             coeff[0][i]= (level>>31)|1;
3737             coeff_count[i]= 1;
3738         }
3739     }
3740
3741     *overflow= s->max_qcoeff < max; //overflow might have happened
3742
3743     if(last_non_zero < start_i){
3744         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3745         return last_non_zero;
3746     }
3747
3748     score_tab[start_i]= 0;
3749     survivor[0]= start_i;
3750     survivor_count= 1;
3751
3752     for(i=start_i; i<=last_non_zero; i++){
3753         int level_index, j, zero_distortion;
3754         int dct_coeff= FFABS(block[ scantable[i] ]);
3755         int best_score=256*256*256*120;
3756
3757         if (s->dsp.fdct == ff_fdct_ifast)
3758             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3759         zero_distortion= dct_coeff*dct_coeff;
3760
3761         for(level_index=0; level_index < coeff_count[i]; level_index++){
3762             int distortion;
3763             int level= coeff[level_index][i];
3764             const int alevel= FFABS(level);
3765             int unquant_coeff;
3766
3767             av_assert2(level);
3768
3769             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3770                 unquant_coeff= alevel*qmul + qadd;
3771             }else{ //MPEG1
3772                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3773                 if(s->mb_intra){
3774                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3775                         unquant_coeff =   (unquant_coeff - 1) | 1;
3776                 }else{
3777                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3778                         unquant_coeff =   (unquant_coeff - 1) | 1;
3779                 }
3780                 unquant_coeff<<= 3;
3781             }
3782
3783             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3784             level+=64;
3785             if((level&(~127)) == 0){
3786                 for(j=survivor_count-1; j>=0; j--){
3787                     int run= i - survivor[j];
3788                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3789                     score += score_tab[i-run];
3790
3791                     if(score < best_score){
3792                         best_score= score;
3793                         run_tab[i+1]= run;
3794                         level_tab[i+1]= level-64;
3795                     }
3796                 }
3797
3798                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3799                     for(j=survivor_count-1; j>=0; j--){
3800                         int run= i - survivor[j];
3801                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3802                         score += score_tab[i-run];
3803                         if(score < last_score){
3804                             last_score= score;
3805                             last_run= run;
3806                             last_level= level-64;
3807                             last_i= i+1;
3808                         }
3809                     }
3810                 }
3811             }else{
3812                 distortion += esc_length*lambda;
3813                 for(j=survivor_count-1; j>=0; j--){
3814                     int run= i - survivor[j];
3815                     int score= distortion + score_tab[i-run];
3816
3817                     if(score < best_score){
3818                         best_score= score;
3819                         run_tab[i+1]= run;
3820                         level_tab[i+1]= level-64;
3821                     }
3822                 }
3823
3824                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3825                   for(j=survivor_count-1; j>=0; j--){
3826                         int run= i - survivor[j];
3827                         int score= distortion + score_tab[i-run];
3828                         if(score < last_score){
3829                             last_score= score;
3830                             last_run= run;
3831                             last_level= level-64;
3832                             last_i= i+1;
3833                         }
3834                     }
3835                 }
3836             }
3837         }
3838
3839         score_tab[i+1]= best_score;
3840
3841         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3842         if(last_non_zero <= 27){
3843             for(; survivor_count; survivor_count--){
3844                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3845                     break;
3846             }
3847         }else{
3848             for(; survivor_count; survivor_count--){
3849                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3850                     break;
3851             }
3852         }
3853
3854         survivor[ survivor_count++ ]= i+1;
3855     }
3856
3857     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3858         last_score= 256*256*256*120;
3859         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3860             int score= score_tab[i];
3861             if(i) score += lambda*2; //FIXME exacter?
3862
3863             if(score < last_score){
3864                 last_score= score;
3865                 last_i= i;
3866                 last_level= level_tab[i];
3867                 last_run= run_tab[i];
3868             }
3869         }
3870     }
3871
3872     s->coded_score[n] = last_score;
3873
3874     dc= FFABS(block[0]);
3875     last_non_zero= last_i - 1;
3876     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3877
3878     if(last_non_zero < start_i)
3879         return last_non_zero;
3880
3881     if(last_non_zero == 0 && start_i == 0){
3882         int best_level= 0;
3883         int best_score= dc * dc;
3884
3885         for(i=0; i<coeff_count[0]; i++){
3886             int level= coeff[i][0];
3887             int alevel= FFABS(level);
3888             int unquant_coeff, score, distortion;
3889
3890             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3891                     unquant_coeff= (alevel*qmul + qadd)>>3;
3892             }else{ //MPEG1
3893                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3894                     unquant_coeff =   (unquant_coeff - 1) | 1;
3895             }
3896             unquant_coeff = (unquant_coeff + 4) >> 3;
3897             unquant_coeff<<= 3 + 3;
3898
3899             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3900             level+=64;
3901             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3902             else                    score= distortion + esc_length*lambda;
3903
3904             if(score < best_score){
3905                 best_score= score;
3906                 best_level= level - 64;
3907             }
3908         }
3909         block[0]= best_level;
3910         s->coded_score[n] = best_score - dc*dc;
3911         if(best_level == 0) return -1;
3912         else                return last_non_zero;
3913     }
3914
3915     i= last_i;
3916     av_assert2(last_level);
3917
3918     block[ perm_scantable[last_non_zero] ]= last_level;
3919     i -= last_run + 1;
3920
3921     for(; i>start_i; i -= run_tab[i] + 1){
3922         block[ perm_scantable[i-1] ]= level_tab[i];
3923     }
3924
3925     return last_non_zero;
3926 }
3927
3928 //#define REFINE_STATS 1
3929 static int16_t basis[64][64];
3930
3931 static void build_basis(uint8_t *perm){
3932     int i, j, x, y;
3933     emms_c();
3934     for(i=0; i<8; i++){
3935         for(j=0; j<8; j++){
3936             for(y=0; y<8; y++){
3937                 for(x=0; x<8; x++){
3938                     double s= 0.25*(1<<BASIS_SHIFT);
3939                     int index= 8*i + j;
3940                     int perm_index= perm[index];
3941                     if(i==0) s*= sqrt(0.5);
3942                     if(j==0) s*= sqrt(0.5);
3943                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3944                 }
3945             }
3946         }
3947     }
3948 }
3949
3950 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3951                         int16_t *block, int16_t *weight, int16_t *orig,
3952                         int n, int qscale){
3953     int16_t rem[64];
3954     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3955     const uint8_t *scantable= s->intra_scantable.scantable;
3956     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3957 //    unsigned int threshold1, threshold2;
3958 //    int bias=0;
3959     int run_tab[65];
3960     int prev_run=0;
3961     int prev_level=0;
3962     int qmul, qadd, start_i, last_non_zero, i, dc;
3963     uint8_t * length;
3964     uint8_t * last_length;
3965     int lambda;
3966     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3967 #ifdef REFINE_STATS
3968 static int count=0;
3969 static int after_last=0;
3970 static int to_zero=0;
3971 static int from_zero=0;
3972 static int raise=0;
3973 static int lower=0;
3974 static int messed_sign=0;
3975 #endif
3976
3977     if(basis[0][0] == 0)
3978         build_basis(s->dsp.idct_permutation);
3979
3980     qmul= qscale*2;
3981     qadd= (qscale-1)|1;
3982     if (s->mb_intra) {
3983         if (!s->h263_aic) {
3984             if (n < 4)
3985                 q = s->y_dc_scale;
3986             else
3987                 q = s->c_dc_scale;
3988         } else{
3989             /* For AIC we skip quant/dequant of INTRADC */
3990             q = 1;
3991             qadd=0;
3992         }
3993         q <<= RECON_SHIFT-3;
3994         /* note: block[0] is assumed to be positive */
3995         dc= block[0]*q;
3996 //        block[0] = (block[0] + (q >> 1)) / q;
3997         start_i = 1;
3998 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3999 //            bias= 1<<(QMAT_SHIFT-1);
4000         length     = s->intra_ac_vlc_length;
4001         last_length= s->intra_ac_vlc_last_length;
4002     } else {
4003         dc= 0;
4004         start_i = 0;
4005         length     = s->inter_ac_vlc_length;
4006         last_length= s->inter_ac_vlc_last_length;
4007     }
4008     last_non_zero = s->block_last_index[n];
4009
4010 #ifdef REFINE_STATS
4011 {START_TIMER
4012 #endif
4013     dc += (1<<(RECON_SHIFT-1));
4014     for(i=0; i<64; i++){
4015         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4016     }
4017 #ifdef REFINE_STATS
4018 STOP_TIMER("memset rem[]")}
4019 #endif
4020     sum=0;
4021     for(i=0; i<64; i++){
4022         int one= 36;
4023         int qns=4;
4024         int w;
4025
4026         w= FFABS(weight[i]) + qns*one;
4027         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4028
4029         weight[i] = w;
4030 //        w=weight[i] = (63*qns + (w/2)) / w;
4031
4032         av_assert2(w>0);
4033         av_assert2(w<(1<<6));
4034         sum += w*w;
4035     }
4036     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4037 #ifdef REFINE_STATS
4038 {START_TIMER
4039 #endif
4040     run=0;
4041     rle_index=0;
4042     for(i=start_i; i<=last_non_zero; i++){
4043         int j= perm_scantable[i];
4044         const int level= block[j];
4045         int coeff;
4046
4047         if(level){
4048             if(level<0) coeff= qmul*level - qadd;
4049             else        coeff= qmul*level + qadd;
4050             run_tab[rle_index++]=run;
4051             run=0;
4052
4053             s->dsp.add_8x8basis(rem, basis[j], coeff);
4054         }else{
4055             run++;
4056         }
4057     }
4058 #ifdef REFINE_STATS
4059 if(last_non_zero>0){
4060 STOP_TIMER("init rem[]")
4061 }
4062 }
4063
4064 {START_TIMER
4065 #endif
4066     for(;;){
4067         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
4068         int best_coeff=0;
4069         int best_change=0;
4070         int run2, best_unquant_change=0, analyze_gradient;
4071 #ifdef REFINE_STATS
4072 {START_TIMER
4073 #endif
4074         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4075
4076         if(analyze_gradient){
4077 #ifdef REFINE_STATS
4078 {START_TIMER
4079 #endif
4080             for(i=0; i<64; i++){
4081                 int w= weight[i];
4082
4083                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4084             }
4085 #ifdef REFINE_STATS
4086 STOP_TIMER("rem*w*w")}
4087 {START_TIMER
4088 #endif
4089             s->dsp.fdct(d1);
4090 #ifdef REFINE_STATS
4091 STOP_TIMER("dct")}
4092 #endif
4093         }
4094
4095         if(start_i){
4096             const int level= block[0];
4097             int change, old_coeff;
4098
4099             av_assert2(s->mb_intra);
4100
4101             old_coeff= q*level;
4102
4103             for(change=-1; change<=1; change+=2){
4104                 int new_level= level + change;
4105                 int score, new_coeff;
4106
4107                 new_coeff= q*new_level;
4108                 if(new_coeff >= 2048 || new_coeff < 0)
4109                     continue;
4110
4111                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
4112                 if(score<best_score){
4113                     best_score= score;
4114                     best_coeff= 0;
4115                     best_change= change;
4116                     best_unquant_change= new_coeff - old_coeff;
4117                 }
4118             }
4119         }
4120
4121         run=0;
4122         rle_index=0;
4123         run2= run_tab[rle_index++];
4124         prev_level=0;
4125         prev_run=0;
4126
4127         for(i=start_i; i<64; i++){
4128             int j= perm_scantable[i];
4129             const int level= block[j];
4130             int change, old_coeff;
4131
4132             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4133                 break;
4134
4135             if(level){
4136                 if(level<0) old_coeff= qmul*level - qadd;
4137                 else        old_coeff= qmul*level + qadd;
4138                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4139             }else{
4140                 old_coeff=0;
4141                 run2--;
4142                 av_assert2(run2>=0 || i >= last_non_zero );
4143             }
4144
4145             for(change=-1; change<=1; change+=2){
4146                 int new_level= level + change;
4147                 int score, new_coeff, unquant_change;
4148
4149                 score=0;
4150                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4151                    continue;
4152
4153                 if(new_level){
4154                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4155                     else            new_coeff= qmul*new_level + qadd;
4156                     if(new_coeff >= 2048 || new_coeff <= -2048)
4157                         continue;
4158                     //FIXME check for overflow
4159
4160                     if(level){
4161                         if(level < 63 && level > -63){
4162                             if(i < last_non_zero)
4163                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4164                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4165                             else
4166                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4167                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4168                         }
4169                     }else{
4170                         av_assert2(FFABS(new_level)==1);
4171
4172                         if(analyze_gradient){
4173                             int g= d1[ scantable[i] ];
4174                             if(g && (g^new_level) >= 0)
4175                                 continue;
4176                         }
4177
4178                         if(i < last_non_zero){
4179                             int next_i= i + run2 + 1;
4180                             int next_level= block[ perm_scantable[next_i] ] + 64;
4181
4182                             if(next_level&(~127))
4183                                 next_level= 0;
4184
4185                             if(next_i < last_non_zero)
4186                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4187                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4188                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4189                             else
4190                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4191                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4192                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4193                         }else{
4194                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4195                             if(prev_level){
4196                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4197                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4198                             }
4199                         }
4200                     }
4201                 }else{
4202                     new_coeff=0;
4203                     av_assert2(FFABS(level)==1);
4204
4205                     if(i < last_non_zero){
4206                         int next_i= i + run2 + 1;
4207                         int next_level= block[ perm_scantable[next_i] ] + 64;
4208
4209                         if(next_level&(~127))
4210                             next_level= 0;
4211
4212                         if(next_i < last_non_zero)
4213                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4214                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4215                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4216                         else
4217                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4218                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4219                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4220                     }else{
4221                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4222                         if(prev_level){
4223                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4224                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4225                         }
4226                     }
4227                 }
4228
4229                 score *= lambda;
4230
4231                 unquant_change= new_coeff - old_coeff;
4232                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4233
4234                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4235                 if(score<best_score){
4236                     best_score= score;
4237                     best_coeff= i;
4238                     best_change= change;
4239                     best_unquant_change= unquant_change;
4240                 }
4241             }
4242             if(level){
4243                 prev_level= level + 64;
4244                 if(prev_level&(~127))
4245                     prev_level= 0;
4246                 prev_run= run;
4247                 run=0;
4248             }else{
4249                 run++;
4250             }
4251         }
4252 #ifdef REFINE_STATS
4253 STOP_TIMER("iterative step")}
4254 #endif
4255
4256         if(best_change){
4257             int j= perm_scantable[ best_coeff ];
4258
4259             block[j] += best_change;
4260
4261             if(best_coeff > last_non_zero){
4262                 last_non_zero= best_coeff;
4263                 av_assert2(block[j]);
4264 #ifdef REFINE_STATS
4265 after_last++;
4266 #endif
4267             }else{
4268 #ifdef REFINE_STATS
4269 if(block[j]){
4270     if(block[j] - best_change){
4271         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4272             raise++;
4273         }else{
4274             lower++;
4275         }
4276     }else{
4277         from_zero++;
4278     }
4279 }else{
4280     to_zero++;
4281 }
4282 #endif
4283                 for(; last_non_zero>=start_i; last_non_zero--){
4284                     if(block[perm_scantable[last_non_zero]])
4285                         break;
4286                 }
4287             }
4288 #ifdef REFINE_STATS
4289 count++;
4290 if(256*256*256*64 % count == 0){
4291     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4292 }
4293 #endif
4294             run=0;
4295             rle_index=0;
4296             for(i=start_i; i<=last_non_zero; i++){
4297                 int j= perm_scantable[i];
4298                 const int level= block[j];
4299
4300                  if(level){
4301                      run_tab[rle_index++]=run;
4302                      run=0;
4303                  }else{
4304                      run++;
4305                  }
4306             }
4307
4308             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4309         }else{
4310             break;
4311         }
4312     }
4313 #ifdef REFINE_STATS
4314 if(last_non_zero>0){
4315 STOP_TIMER("iterative search")
4316 }
4317 }
4318 #endif
4319
4320     return last_non_zero;
4321 }
4322
4323 int ff_dct_quantize_c(MpegEncContext *s,
4324                         int16_t *block, int n,
4325                         int qscale, int *overflow)
4326 {
4327     int i, j, level, last_non_zero, q, start_i;
4328     const int *qmat;
4329     const uint8_t *scantable= s->intra_scantable.scantable;
4330     int bias;
4331     int max=0;
4332     unsigned int threshold1, threshold2;
4333
4334     s->dsp.fdct (block);
4335
4336     if(s->dct_error_sum)
4337         s->denoise_dct(s, block);
4338
4339     if (s->mb_intra) {
4340         if (!s->h263_aic) {
4341             if (n < 4)
4342                 q = s->y_dc_scale;
4343             else
4344                 q = s->c_dc_scale;
4345             q = q << 3;
4346         } else
4347             /* For AIC we skip quant/dequant of INTRADC */
4348             q = 1 << 3;
4349
4350         /* note: block[0] is assumed to be positive */
4351         block[0] = (block[0] + (q >> 1)) / q;
4352         start_i = 1;
4353         last_non_zero = 0;
4354         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4355         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4356     } else {
4357         start_i = 0;
4358         last_non_zero = -1;
4359         qmat = s->q_inter_matrix[qscale];
4360         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4361     }
4362     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4363     threshold2= (threshold1<<1);
4364     for(i=63;i>=start_i;i--) {
4365         j = scantable[i];
4366         level = block[j] * qmat[j];
4367
4368         if(((unsigned)(level+threshold1))>threshold2){
4369             last_non_zero = i;
4370             break;
4371         }else{
4372             block[j]=0;
4373         }
4374     }
4375     for(i=start_i; i<=last_non_zero; i++) {
4376         j = scantable[i];
4377         level = block[j] * qmat[j];
4378
4379 //        if(   bias+level >= (1<<QMAT_SHIFT)
4380 //           || bias-level >= (1<<QMAT_SHIFT)){
4381         if(((unsigned)(level+threshold1))>threshold2){
4382             if(level>0){
4383                 level= (bias + level)>>QMAT_SHIFT;
4384                 block[j]= level;
4385             }else{
4386                 level= (bias - level)>>QMAT_SHIFT;
4387                 block[j]= -level;
4388             }
4389             max |=level;
4390         }else{
4391             block[j]=0;
4392         }
4393     }
4394     *overflow= s->max_qcoeff < max; //overflow might have happened
4395
4396     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4397     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4398         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4399
4400     return last_non_zero;
4401 }
4402
4403 #define OFFSET(x) offsetof(MpegEncContext, x)
4404 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4405 static const AVOption h263_options[] = {
4406     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4407     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4408     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4409     FF_MPV_COMMON_OPTS
4410     { NULL },
4411 };
4412
4413 static const AVClass h263_class = {
4414     .class_name = "H.263 encoder",
4415     .item_name  = av_default_item_name,
4416     .option     = h263_options,
4417     .version    = LIBAVUTIL_VERSION_INT,
4418 };
4419
4420 AVCodec ff_h263_encoder = {
4421     .name           = "h263",
4422     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4423     .type           = AVMEDIA_TYPE_VIDEO,
4424     .id             = AV_CODEC_ID_H263,
4425     .priv_data_size = sizeof(MpegEncContext),
4426     .init           = ff_MPV_encode_init,
4427     .encode2        = ff_MPV_encode_picture,
4428     .close          = ff_MPV_encode_end,
4429     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4430     .priv_class     = &h263_class,
4431 };
4432
4433 static const AVOption h263p_options[] = {
4434     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4435     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4436     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4437     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4438     FF_MPV_COMMON_OPTS
4439     { NULL },
4440 };
4441 static const AVClass h263p_class = {
4442     .class_name = "H.263p encoder",
4443     .item_name  = av_default_item_name,
4444     .option     = h263p_options,
4445     .version    = LIBAVUTIL_VERSION_INT,
4446 };
4447
4448 AVCodec ff_h263p_encoder = {
4449     .name           = "h263p",
4450     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4451     .type           = AVMEDIA_TYPE_VIDEO,
4452     .id             = AV_CODEC_ID_H263P,
4453     .priv_data_size = sizeof(MpegEncContext),
4454     .init           = ff_MPV_encode_init,
4455     .encode2        = ff_MPV_encode_picture,
4456     .close          = ff_MPV_encode_end,
4457     .capabilities   = CODEC_CAP_SLICE_THREADS,
4458     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4459     .priv_class     = &h263p_class,
4460 };
4461
4462 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4463
4464 AVCodec ff_msmpeg4v2_encoder = {
4465     .name           = "msmpeg4v2",
4466     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4467     .type           = AVMEDIA_TYPE_VIDEO,
4468     .id             = AV_CODEC_ID_MSMPEG4V2,
4469     .priv_data_size = sizeof(MpegEncContext),
4470     .init           = ff_MPV_encode_init,
4471     .encode2        = ff_MPV_encode_picture,
4472     .close          = ff_MPV_encode_end,
4473     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4474     .priv_class     = &msmpeg4v2_class,
4475 };
4476
4477 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4478
4479 AVCodec ff_msmpeg4v3_encoder = {
4480     .name           = "msmpeg4",
4481     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4482     .type           = AVMEDIA_TYPE_VIDEO,
4483     .id             = AV_CODEC_ID_MSMPEG4V3,
4484     .priv_data_size = sizeof(MpegEncContext),
4485     .init           = ff_MPV_encode_init,
4486     .encode2        = ff_MPV_encode_picture,
4487     .close          = ff_MPV_encode_end,
4488     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4489     .priv_class     = &msmpeg4v3_class,
4490 };
4491
4492 FF_MPV_GENERIC_CLASS(wmv1)
4493
4494 AVCodec ff_wmv1_encoder = {
4495     .name           = "wmv1",
4496     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4497     .type           = AVMEDIA_TYPE_VIDEO,
4498     .id             = AV_CODEC_ID_WMV1,
4499     .priv_data_size = sizeof(MpegEncContext),
4500     .init           = ff_MPV_encode_init,
4501     .encode2        = ff_MPV_encode_picture,
4502     .close          = ff_MPV_encode_end,
4503     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4504     .priv_class     = &wmv1_class,
4505 };