git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "mpegvideodata.h"
  44 #include "h261.h"
  45 #include "h263.h"
  46 #include "h263data.h"
  47 #include "mjpegenc_common.h"
  48 #include "mathops.h"
  49 #include "mpegutils.h"
  50 #include "mjpegenc.h"
  51 #include "msmpeg4.h"
  52 #include "pixblockdsp.h"
  53 #include "qpeldsp.h"
  54 #include "faandct.h"
  55 #include "thread.h"
  56 #include "aandcttab.h"
  57 #include "flv.h"
  58 #include "mpeg4video.h"
  59 #include "internal.h"
  60 #include "bytestream.h"
  61 #include "wmv2.h"
  62 #include "rv10.h"
  63 #include <limits.h>
  64
  65 #define QUANT_BIAS_SHIFT 8
  66
  67 #define QMAT_SHIFT_MMX 16
  68 #define QMAT_SHIFT 22
  69
  70 static int encode_picture(MpegEncContext *s, int picture_number);
  71 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  72 static int sse_mb(MpegEncContext *s);
  73 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  74 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  75
  76 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  77 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  78
  79 const AVOption ff_mpv_generic_options[] = {
  80     FF_MPV_COMMON_OPTS
  81     { NULL },
  82 };
  83
  84 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  85                        uint16_t (*qmat16)[2][64],
  86                        const uint16_t *quant_matrix,
  87                        int bias, int qmin, int qmax, int intra)
  88 {
  89     FDCTDSPContext *fdsp = &s->fdsp;
  90     int qscale;
  91     int shift = 0;
  92
  93     for (qscale = qmin; qscale <= qmax; qscale++) {
  94         int i;
  95         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  96 #if CONFIG_FAANDCT
  97             fdsp->fdct == ff_faandct            ||
  98 #endif /* CONFIG_FAANDCT */
  99             fdsp->fdct == ff_jpeg_fdct_islow_10) {
 100             for (i = 0; i < 64; i++) {
 101                 const int j = s->idsp.idct_permutation[i];
 102                 int64_t den = (int64_t) qscale * quant_matrix[j];
 103                 /* 16 <= qscale * quant_matrix[i] <= 7905
 104                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 105                  *             19952 <=              x  <= 249205026
 106                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 107                  *           3444240 >= (1 << 36) / (x) >= 275 */
 108
 109                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 110             }
 111         } else if (fdsp->fdct == ff_fdct_ifast) {
 112             for (i = 0; i < 64; i++) {
 113                 const int j = s->idsp.idct_permutation[i];
 114                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
 115                 /* 16 <= qscale * quant_matrix[i] <= 7905
 116                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 117                  *             19952 <=              x  <= 249205026
 118                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 119                  *           3444240 >= (1 << 36) / (x) >= 275 */
 120
 121                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
 122             }
 123         } else {
 124             for (i = 0; i < 64; i++) {
 125                 const int j = s->idsp.idct_permutation[i];
 126                 int64_t den = (int64_t) qscale * quant_matrix[j];
 127                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 128                  * Assume x = qscale * quant_matrix[i]
 129                  * So             16 <=              x  <= 7905
 130                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 131                  * so          32768 >= (1 << 19) / (x) >= 67 */
 132                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 133                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 134                 //                    (qscale * quant_matrix[i]);
 135                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
 136
 137                 if (qmat16[qscale][0][i] == 0 ||
 138                     qmat16[qscale][0][i] == 128 * 256)
 139                     qmat16[qscale][0][i] = 128 * 256 - 1;
 140                 qmat16[qscale][1][i] =
 141                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 142                                 qmat16[qscale][0][i]);
 143             }
 144         }
 145
 146         for (i = intra; i < 64; i++) {
 147             int64_t max = 8191;
 148             if (fdsp->fdct == ff_fdct_ifast) {
 149                 max = (8191LL * ff_aanscales[i]) >> 14;
 150             }
 151             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 152                 shift++;
 153             }
 154         }
 155     }
 156     if (shift) {
 157         av_log(NULL, AV_LOG_INFO,
 158                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 159                QMAT_SHIFT - shift);
 160     }
 161 }
 162
 163 static inline void update_qscale(MpegEncContext *s)
 164 {
 165     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 166                 (FF_LAMBDA_SHIFT + 7);
 167     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 168
 169     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 170                  FF_LAMBDA_SHIFT;
 171 }
 172
 173 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 174 {
 175     int i;
 176
 177     if (matrix) {
 178         put_bits(pb, 1, 1);
 179         for (i = 0; i < 64; i++) {
 180             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 181         }
 182     } else
 183         put_bits(pb, 1, 0);
 184 }
 185
 186 /**
 187  * init s->current_picture.qscale_table from s->lambda_table
 188  */
 189 void ff_init_qscale_tab(MpegEncContext *s)
 190 {
 191     int8_t * const qscale_table = s->current_picture.qscale_table;
 192     int i;
 193
 194     for (i = 0; i < s->mb_num; i++) {
 195         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 196         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 197         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 198                                                   s->avctx->qmax);
 199     }
 200 }
 201
 202 static void update_duplicate_context_after_me(MpegEncContext *dst,
 203                                               MpegEncContext *src)
 204 {
 205 #define COPY(a) dst->a= src->a
 206     COPY(pict_type);
 207     COPY(current_picture);
 208     COPY(f_code);
 209     COPY(b_code);
 210     COPY(qscale);
 211     COPY(lambda);
 212     COPY(lambda2);
 213     COPY(picture_in_gop_number);
 214     COPY(gop_picture_number);
 215     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 216     COPY(progressive_frame);    // FIXME don't set in encode_header
 217     COPY(partitioned_frame);    // FIXME don't set in encode_header
 218 #undef COPY
 219 }
 220
 221 /**
 222  * Set the given MpegEncContext to defaults for encoding.
 223  * the changed fields will not depend upon the prior state of the MpegEncContext.
 224  */
 225 static void mpv_encode_defaults(MpegEncContext *s)
 226 {
 227     int i;
 228     ff_mpv_common_defaults(s);
 229
 230     for (i = -16; i < 16; i++) {
 231         default_fcode_tab[i + MAX_MV] = 1;
 232     }
 233     s->me.mv_penalty = default_mv_penalty;
 234     s->fcode_tab     = default_fcode_tab;
 235
 236     s->input_picture_number  = 0;
 237     s->picture_in_gop_number = 0;
 238 }
 239
 240 /* init video encoder */
 241 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 242 {
 243     MpegEncContext *s = avctx->priv_data;
 244     AVCPBProperties *cpb_props;
 245     int i, ret, format_supported;
 246
 247     mpv_encode_defaults(s);
 248
 249     switch (avctx->codec_id) {
 250     case AV_CODEC_ID_MPEG2VIDEO:
 251         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 252             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 253             av_log(avctx, AV_LOG_ERROR,
 254                    "only YUV420 and YUV422 are supported\n");
 255             return -1;
 256         }
 257         break;
 258     case AV_CODEC_ID_MJPEG:
 259         format_supported = 0;
 260         /* JPEG color space */
 261         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 262             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 263             (avctx->color_range == AVCOL_RANGE_JPEG &&
 264              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 265               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
 266             format_supported = 1;
 267         /* MPEG color space */
 268         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 269                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 270                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
 271             format_supported = 1;
 272
 273         if (!format_supported) {
 274             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 275             return -1;
 276         }
 277         break;
 278     default:
 279         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 280             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 281             return -1;
 282         }
 283     }
 284
 285     switch (avctx->pix_fmt) {
 286     case AV_PIX_FMT_YUVJ422P:
 287     case AV_PIX_FMT_YUV422P:
 288         s->chroma_format = CHROMA_422;
 289         break;
 290     case AV_PIX_FMT_YUVJ420P:
 291     case AV_PIX_FMT_YUV420P:
 292     default:
 293         s->chroma_format = CHROMA_420;
 294         break;
 295     }
 296
 297     s->bit_rate = avctx->bit_rate;
 298     s->width    = avctx->width;
 299     s->height   = avctx->height;
 300     if (avctx->gop_size > 600 &&
 301         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 302         av_log(avctx, AV_LOG_ERROR,
 303                "Warning keyframe interval too large! reducing it ...\n");
 304         avctx->gop_size = 600;
 305     }
 306     s->gop_size     = avctx->gop_size;
 307     s->avctx        = avctx;
 308     if (avctx->max_b_frames > MAX_B_FRAMES) {
 309         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 310                "is %d.\n", MAX_B_FRAMES);
 311     }
 312     s->max_b_frames = avctx->max_b_frames;
 313     s->codec_id     = avctx->codec->id;
 314     s->strict_std_compliance = avctx->strict_std_compliance;
 315     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
 316     s->mpeg_quant         = avctx->mpeg_quant;
 317     s->rtp_mode           = !!avctx->rtp_payload_size;
 318     s->intra_dc_precision = avctx->intra_dc_precision;
 319     s->user_specified_pts = AV_NOPTS_VALUE;
 320
 321     if (s->gop_size <= 1) {
 322         s->intra_only = 1;
 323         s->gop_size   = 12;
 324     } else {
 325         s->intra_only = 0;
 326     }
 327
 328 #if FF_API_MOTION_EST
 329 FF_DISABLE_DEPRECATION_WARNINGS
 330     s->me_method = avctx->me_method;
 331 FF_ENABLE_DEPRECATION_WARNINGS
 332 #endif
 333
 334     /* Fixed QSCALE */
 335     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
 336
 337 #if FF_API_MPV_OPT
 338     FF_DISABLE_DEPRECATION_WARNINGS
 339     if (avctx->border_masking != 0.0)
 340         s->border_masking = avctx->border_masking;
 341     FF_ENABLE_DEPRECATION_WARNINGS
 342 #endif
 343
 344     s->adaptive_quant = (s->avctx->lumi_masking ||
 345                          s->avctx->dark_masking ||
 346                          s->avctx->temporal_cplx_masking ||
 347                          s->avctx->spatial_cplx_masking  ||
 348                          s->avctx->p_masking      ||
 349                          s->border_masking ||
 350                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 351                         !s->fixed_qscale;
 352
 353     s->loop_filter = !!(s->avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
 354
 355     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 356         av_log(avctx, AV_LOG_ERROR,
 357                "a vbv buffer size is needed, "
 358                "for encoding with a maximum bitrate\n");
 359         return -1;
 360     }
 361
 362     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 363         av_log(avctx, AV_LOG_INFO,
 364                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 365     }
 366
 367     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 368         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 369         return -1;
 370     }
 371
 372     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 373         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 374         return -1;
 375     }
 376
 377     if (avctx->rc_max_rate &&
 378         avctx->rc_max_rate == avctx->bit_rate &&
 379         avctx->rc_max_rate != avctx->rc_min_rate) {
 380         av_log(avctx, AV_LOG_INFO,
 381                "impossible bitrate constraints, this will fail\n");
 382     }
 383
 384     if (avctx->rc_buffer_size &&
 385         avctx->bit_rate * (int64_t)avctx->time_base.num >
 386             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 387         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 388         return -1;
 389     }
 390
 391     if (!s->fixed_qscale &&
 392         avctx->bit_rate * av_q2d(avctx->time_base) >
 393             avctx->bit_rate_tolerance) {
 394         av_log(avctx, AV_LOG_ERROR,
 395                "bitrate tolerance too small for bitrate\n");
 396         return -1;
 397     }
 398
 399     if (s->avctx->rc_max_rate &&
 400         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 401         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 402          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 403         90000LL * (avctx->rc_buffer_size - 1) >
 404             s->avctx->rc_max_rate * 0xFFFFLL) {
 405         av_log(avctx, AV_LOG_INFO,
 406                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 407                "specified vbv buffer is too large for the given bitrate!\n");
 408     }
 409
 410     if ((s->avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
 411         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 412         s->codec_id != AV_CODEC_ID_FLV1) {
 413         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 414         return -1;
 415     }
 416
 417     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 418         av_log(avctx, AV_LOG_ERROR,
 419                "OBMC is only supported with simple mb decision\n");
 420         return -1;
 421     }
 422
 423     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 424         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 425         return -1;
 426     }
 427
 428     if (s->max_b_frames                    &&
 429         s->codec_id != AV_CODEC_ID_MPEG4      &&
 430         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 431         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 432         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 433         return -1;
 434     }
 435
 436     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 437          s->codec_id == AV_CODEC_ID_H263  ||
 438          s->codec_id == AV_CODEC_ID_H263P) &&
 439         (avctx->sample_aspect_ratio.num > 255 ||
 440          avctx->sample_aspect_ratio.den > 255)) {
 441         av_log(avctx, AV_LOG_ERROR,
 442                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
 443                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 444         return -1;
 445     }
 446
 447     if ((s->avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
 448         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 449         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 450         return -1;
 451     }
 452
 453     // FIXME mpeg2 uses that too
 454     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
 455         av_log(avctx, AV_LOG_ERROR,
 456                "mpeg2 style quantization not supported by codec\n");
 457         return -1;
 458     }
 459
 460     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 461         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 462         return -1;
 463     }
 464
 465     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 466         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 467         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 468         return -1;
 469     }
 470
 471     if (s->avctx->scenechange_threshold < 1000000000 &&
 472         (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
 473         av_log(avctx, AV_LOG_ERROR,
 474                "closed gop with scene change detection are not supported yet, "
 475                "set threshold to 1000000000\n");
 476         return -1;
 477     }
 478
 479     if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
 480         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 481             av_log(avctx, AV_LOG_ERROR,
 482                   "low delay forcing is only available for mpeg2\n");
 483             return -1;
 484         }
 485         if (s->max_b_frames != 0) {
 486             av_log(avctx, AV_LOG_ERROR,
 487                    "b frames cannot be used with low delay\n");
 488             return -1;
 489         }
 490     }
 491
 492     if (s->q_scale_type == 1) {
 493         if (avctx->qmax > 12) {
 494             av_log(avctx, AV_LOG_ERROR,
 495                    "non linear quant only supports qmax <= 12 currently\n");
 496             return -1;
 497         }
 498     }
 499
 500     if (avctx->slices > 1 &&
 501         (avctx->codec_id == AV_CODEC_ID_FLV1 || avctx->codec_id == AV_CODEC_ID_H261)) {
 502         av_log(avctx, AV_LOG_ERROR, "Multiple slices are not supported by this codec\n");
 503         return AVERROR(EINVAL);
 504     }
 505
 506     if (s->avctx->thread_count > 1         &&
 507         s->codec_id != AV_CODEC_ID_MPEG4      &&
 508         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 509         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 510         (s->codec_id != AV_CODEC_ID_H263P)) {
 511         av_log(avctx, AV_LOG_ERROR,
 512                "multi threaded encoding not supported by codec\n");
 513         return -1;
 514     }
 515
 516     if (s->avctx->thread_count < 1) {
 517         av_log(avctx, AV_LOG_ERROR,
 518                "automatic thread number detection not supported by codec,"
 519                "patch welcome\n");
 520         return -1;
 521     }
 522
 523     if (!avctx->time_base.den || !avctx->time_base.num) {
 524         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 525         return -1;
 526     }
 527
 528     if (avctx->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
 529         av_log(avctx, AV_LOG_INFO,
 530                "notice: b_frame_strategy only affects the first pass\n");
 531         avctx->b_frame_strategy = 0;
 532     }
 533
 534     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 535     if (i > 1) {
 536         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 537         avctx->time_base.den /= i;
 538         avctx->time_base.num /= i;
 539         //return -1;
 540     }
 541
 542     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 543         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
 544         // (a + x * 3 / 8) / x
 545         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 546         s->inter_quant_bias = 0;
 547     } else {
 548         s->intra_quant_bias = 0;
 549         // (a - x / 4) / x
 550         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 551     }
 552
 553 #if FF_API_QUANT_BIAS
 554 FF_DISABLE_DEPRECATION_WARNINGS
 555     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 556         s->intra_quant_bias = avctx->intra_quant_bias;
 557     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 558         s->inter_quant_bias = avctx->inter_quant_bias;
 559 FF_ENABLE_DEPRECATION_WARNINGS
 560 #endif
 561
 562     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 563         s->avctx->time_base.den > (1 << 16) - 1) {
 564         av_log(avctx, AV_LOG_ERROR,
 565                "timebase %d/%d not supported by MPEG 4 standard, "
 566                "the maximum admitted value for the timebase denominator "
 567                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 568                (1 << 16) - 1);
 569         return -1;
 570     }
 571     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 572
 573     switch (avctx->codec->id) {
 574     case AV_CODEC_ID_MPEG1VIDEO:
 575         s->out_format = FMT_MPEG1;
 576         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
 577         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 578         break;
 579     case AV_CODEC_ID_MPEG2VIDEO:
 580         s->out_format = FMT_MPEG1;
 581         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
 582         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 583         s->rtp_mode   = 1;
 584         break;
 585     case AV_CODEC_ID_MJPEG:
 586         s->out_format = FMT_MJPEG;
 587         s->intra_only = 1; /* force intra only for jpeg */
 588         if (!CONFIG_MJPEG_ENCODER ||
 589             ff_mjpeg_encode_init(s) < 0)
 590             return -1;
 591         avctx->delay = 0;
 592         s->low_delay = 1;
 593         break;
 594     case AV_CODEC_ID_H261:
 595         if (!CONFIG_H261_ENCODER)
 596             return -1;
 597         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 598             av_log(avctx, AV_LOG_ERROR,
 599                    "The specified picture size of %dx%d is not valid for the "
 600                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 601                     s->width, s->height);
 602             return -1;
 603         }
 604         s->out_format = FMT_H261;
 605         avctx->delay  = 0;
 606         s->low_delay  = 1;
 607         s->rtp_mode   = 0; /* Sliced encoding not supported */
 608         break;
 609     case AV_CODEC_ID_H263:
 610         if (!CONFIG_H263_ENCODER)
 611         return -1;
 612         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 613                              s->width, s->height) == 8) {
 614             av_log(avctx, AV_LOG_INFO,
 615                    "The specified picture size of %dx%d is not valid for "
 616                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 617                    "352x288, 704x576, and 1408x1152."
 618                    "Try H.263+.\n", s->width, s->height);
 619             return -1;
 620         }
 621         s->out_format = FMT_H263;
 622         avctx->delay  = 0;
 623         s->low_delay  = 1;
 624         break;
 625     case AV_CODEC_ID_H263P:
 626         s->out_format = FMT_H263;
 627         s->h263_plus  = 1;
 628         /* Fx */
 629         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
 630         s->modified_quant  = s->h263_aic;
 631         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 632         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 633
 634         /* /Fx */
 635         /* These are just to be sure */
 636         avctx->delay = 0;
 637         s->low_delay = 1;
 638         break;
 639     case AV_CODEC_ID_FLV1:
 640         s->out_format      = FMT_H263;
 641         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 642         s->unrestricted_mv = 1;
 643         s->rtp_mode  = 0; /* don't allow GOB */
 644         avctx->delay = 0;
 645         s->low_delay = 1;
 646         break;
 647     case AV_CODEC_ID_RV10:
 648         s->out_format = FMT_H263;
 649         avctx->delay  = 0;
 650         s->low_delay  = 1;
 651         break;
 652     case AV_CODEC_ID_RV20:
 653         s->out_format      = FMT_H263;
 654         avctx->delay       = 0;
 655         s->low_delay       = 1;
 656         s->modified_quant  = 1;
 657         s->h263_aic        = 1;
 658         s->h263_plus       = 1;
 659         s->loop_filter     = 1;
 660         s->unrestricted_mv = 0;
 661         break;
 662     case AV_CODEC_ID_MPEG4:
 663         s->out_format      = FMT_H263;
 664         s->h263_pred       = 1;
 665         s->unrestricted_mv = 1;
 666         s->low_delay       = s->max_b_frames ? 0 : 1;
 667         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 668         break;
 669     case AV_CODEC_ID_MSMPEG4V2:
 670         s->out_format      = FMT_H263;
 671         s->h263_pred       = 1;
 672         s->unrestricted_mv = 1;
 673         s->msmpeg4_version = 2;
 674         avctx->delay       = 0;
 675         s->low_delay       = 1;
 676         break;
 677     case AV_CODEC_ID_MSMPEG4V3:
 678         s->out_format        = FMT_H263;
 679         s->h263_pred         = 1;
 680         s->unrestricted_mv   = 1;
 681         s->msmpeg4_version   = 3;
 682         s->flipflop_rounding = 1;
 683         avctx->delay         = 0;
 684         s->low_delay         = 1;
 685         break;
 686     case AV_CODEC_ID_WMV1:
 687         s->out_format        = FMT_H263;
 688         s->h263_pred         = 1;
 689         s->unrestricted_mv   = 1;
 690         s->msmpeg4_version   = 4;
 691         s->flipflop_rounding = 1;
 692         avctx->delay         = 0;
 693         s->low_delay         = 1;
 694         break;
 695     case AV_CODEC_ID_WMV2:
 696         s->out_format        = FMT_H263;
 697         s->h263_pred         = 1;
 698         s->unrestricted_mv   = 1;
 699         s->msmpeg4_version   = 5;
 700         s->flipflop_rounding = 1;
 701         avctx->delay         = 0;
 702         s->low_delay         = 1;
 703         break;
 704     default:
 705         return -1;
 706     }
 707
 708     avctx->has_b_frames = !s->low_delay;
 709
 710     s->encoding = 1;
 711
 712     s->progressive_frame    =
 713     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
 714                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
 715                                 s->alternate_scan);
 716
 717     /* init */
 718     ff_mpv_idct_init(s);
 719     if (ff_mpv_common_init(s) < 0)
 720         return -1;
 721
 722     if (ARCH_X86)
 723         ff_mpv_encode_init_x86(s);
 724
 725     ff_fdctdsp_init(&s->fdsp, avctx);
 726     ff_me_cmp_init(&s->mecc, avctx);
 727     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 728     ff_pixblockdsp_init(&s->pdsp, avctx);
 729     ff_qpeldsp_init(&s->qdsp);
 730
 731     if (s->msmpeg4_version) {
 732         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 733                           2 * 2 * (MAX_LEVEL + 1) *
 734                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 735     }
 736     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 737
 738     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 739     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 740     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 741     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 742     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 743                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 744     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 745                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 746
 747     if (s->avctx->noise_reduction) {
 748         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 749                           2 * 64 * sizeof(uint16_t), fail);
 750     }
 751
 752     if (CONFIG_H263_ENCODER)
 753         ff_h263dsp_init(&s->h263dsp);
 754     if (!s->dct_quantize)
 755         s->dct_quantize = ff_dct_quantize_c;
 756     if (!s->denoise_dct)
 757         s->denoise_dct  = denoise_dct_c;
 758     s->fast_dct_quantize = s->dct_quantize;
 759     if (avctx->trellis)
 760         s->dct_quantize  = dct_quantize_trellis_c;
 761
 762     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 763         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 764
 765     if (s->slice_context_count > 1) {
 766         s->rtp_mode = 1;
 767
 768         if (avctx->codec_id == AV_CODEC_ID_H263 || avctx->codec_id == AV_CODEC_ID_H263P)
 769             s->h263_slice_structured = 1;
 770     }
 771
 772     s->quant_precision = 5;
 773
 774     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 775     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
 776
 777     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 778         ff_h261_encode_init(s);
 779     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 780         ff_h263_encode_init(s);
 781     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 782         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
 783             return ret;
 784     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 785         && s->out_format == FMT_MPEG1)
 786         ff_mpeg1_encode_init(s);
 787
 788     /* init q matrix */
 789     for (i = 0; i < 64; i++) {
 790         int j = s->idsp.idct_permutation[i];
 791         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 792             s->mpeg_quant) {
 793             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 794             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 795         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 796             s->intra_matrix[j] =
 797             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 798         } else {
 799             /* mpeg1/2 */
 800             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 801             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 802         }
 803         if (s->avctx->intra_matrix)
 804             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 805         if (s->avctx->inter_matrix)
 806             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 807     }
 808
 809     /* precompute matrix */
 810     /* for mjpeg, we do include qscale in the matrix */
 811     if (s->out_format != FMT_MJPEG) {
 812         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 813                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 814                           31, 1);
 815         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 816                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 817                           31, 0);
 818     }
 819
 820     if (ff_rate_control_init(s) < 0)
 821         return -1;
 822
 823 #if FF_API_ERROR_RATE
 824     FF_DISABLE_DEPRECATION_WARNINGS
 825     if (avctx->error_rate)
 826         s->error_rate = avctx->error_rate;
 827     FF_ENABLE_DEPRECATION_WARNINGS;
 828 #endif
 829
 830 #if FF_API_NORMALIZE_AQP
 831     FF_DISABLE_DEPRECATION_WARNINGS
 832     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 833         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 834     FF_ENABLE_DEPRECATION_WARNINGS;
 835 #endif
 836
 837 #if FF_API_MV0
 838     FF_DISABLE_DEPRECATION_WARNINGS
 839     if (avctx->flags & CODEC_FLAG_MV0)
 840         s->mpv_flags |= FF_MPV_FLAG_MV0;
 841     FF_ENABLE_DEPRECATION_WARNINGS
 842 #endif
 843
 844 #if FF_API_MPV_OPT
 845     FF_DISABLE_DEPRECATION_WARNINGS
 846     if (avctx->rc_qsquish != 0.0)
 847         s->rc_qsquish = avctx->rc_qsquish;
 848     if (avctx->rc_qmod_amp != 0.0)
 849         s->rc_qmod_amp = avctx->rc_qmod_amp;
 850     if (avctx->rc_qmod_freq)
 851         s->rc_qmod_freq = avctx->rc_qmod_freq;
 852     if (avctx->rc_buffer_aggressivity != 1.0)
 853         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
 854     if (avctx->rc_initial_cplx != 0.0)
 855         s->rc_initial_cplx = avctx->rc_initial_cplx;
 856     if (avctx->lmin)
 857         s->lmin = avctx->lmin;
 858     if (avctx->lmax)
 859         s->lmax = avctx->lmax;
 860
 861     if (avctx->rc_eq) {
 862         av_freep(&s->rc_eq);
 863         s->rc_eq = av_strdup(avctx->rc_eq);
 864         if (!s->rc_eq)
 865             return AVERROR(ENOMEM);
 866     }
 867     FF_ENABLE_DEPRECATION_WARNINGS
 868 #endif
 869
 870     if (avctx->b_frame_strategy == 2) {
 871         for (i = 0; i < s->max_b_frames + 2; i++) {
 872             s->tmp_frames[i] = av_frame_alloc();
 873             if (!s->tmp_frames[i])
 874                 return AVERROR(ENOMEM);
 875
 876             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 877             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 878             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 879
 880             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 881             if (ret < 0)
 882                 return ret;
 883         }
 884     }
 885
 886     cpb_props = ff_add_cpb_side_data(avctx);
 887     if (!cpb_props)
 888         return AVERROR(ENOMEM);
 889     cpb_props->max_bitrate = avctx->rc_max_rate;
 890     cpb_props->min_bitrate = avctx->rc_min_rate;
 891     cpb_props->avg_bitrate = avctx->bit_rate;
 892     cpb_props->buffer_size = avctx->rc_buffer_size;
 893
 894     return 0;
 895 fail:
 896     ff_mpv_encode_end(avctx);
 897     return AVERROR_UNKNOWN;
 898 }
 899
 900 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
 901 {
 902     MpegEncContext *s = avctx->priv_data;
 903     int i;
 904
 905     ff_rate_control_uninit(s);
 906
 907     ff_mpv_common_end(s);
 908     if (CONFIG_MJPEG_ENCODER &&
 909         s->out_format == FMT_MJPEG)
 910         ff_mjpeg_encode_close(s);
 911
 912     av_freep(&avctx->extradata);
 913
 914     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 915         av_frame_free(&s->tmp_frames[i]);
 916
 917     ff_free_picture_tables(&s->new_picture);
 918     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
 919
 920     av_freep(&s->avctx->stats_out);
 921     av_freep(&s->ac_stats);
 922
 923     av_freep(&s->q_intra_matrix);
 924     av_freep(&s->q_inter_matrix);
 925     av_freep(&s->q_intra_matrix16);
 926     av_freep(&s->q_inter_matrix16);
 927     av_freep(&s->input_picture);
 928     av_freep(&s->reordered_input_picture);
 929     av_freep(&s->dct_offset);
 930
 931     return 0;
 932 }
 933
 934 static int get_sae(uint8_t *src, int ref, int stride)
 935 {
 936     int x,y;
 937     int acc = 0;
 938
 939     for (y = 0; y < 16; y++) {
 940         for (x = 0; x < 16; x++) {
 941             acc += FFABS(src[x + y * stride] - ref);
 942         }
 943     }
 944
 945     return acc;
 946 }
 947
 948 static int get_intra_count(MpegEncContext *s, uint8_t *src,
 949                            uint8_t *ref, int stride)
 950 {
 951     int x, y, w, h;
 952     int acc = 0;
 953
 954     w = s->width  & ~15;
 955     h = s->height & ~15;
 956
 957     for (y = 0; y < h; y += 16) {
 958         for (x = 0; x < w; x += 16) {
 959             int offset = x + y * stride;
 960             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
 961                                       stride, 16);
 962             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
 963             int sae  = get_sae(src + offset, mean, stride);
 964
 965             acc += sae + 500 < sad;
 966         }
 967     }
 968     return acc;
 969 }
 970
 971 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
 972 {
 973     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
 974                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
 975                             s->mb_stride, s->mb_height, s->b8_stride,
 976                             &s->linesize, &s->uvlinesize);
 977 }
 978
 979 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
 980 {
 981     Picture *pic = NULL;
 982     int64_t pts;
 983     int i, display_picture_number = 0, ret;
 984     int encoding_delay = s->max_b_frames ? s->max_b_frames
 985                                          : (s->low_delay ? 0 : 1);
 986     int flush_offset = 1;
 987     int direct = 1;
 988
 989     if (pic_arg) {
 990         pts = pic_arg->pts;
 991         display_picture_number = s->input_picture_number++;
 992
 993         if (pts != AV_NOPTS_VALUE) {
 994             if (s->user_specified_pts != AV_NOPTS_VALUE) {
 995                 int64_t time = pts;
 996                 int64_t last = s->user_specified_pts;
 997
 998                 if (time <= last) {
 999                     av_log(s->avctx, AV_LOG_ERROR,
1000                            "Error, Invalid timestamp=%"PRId64", "
1001                            "last=%"PRId64"\n", pts, s->user_specified_pts);
1002                     return -1;
1003                 }
1004
1005                 if (!s->low_delay && display_picture_number == 1)
1006                     s->dts_delta = time - last;
1007             }
1008             s->user_specified_pts = pts;
1009         } else {
1010             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1011                 s->user_specified_pts =
1012                 pts = s->user_specified_pts + 1;
1013                 av_log(s->avctx, AV_LOG_INFO,
1014                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1015                        pts);
1016             } else {
1017                 pts = display_picture_number;
1018             }
1019         }
1020
1021         if (!pic_arg->buf[0] ||
1022             pic_arg->linesize[0] != s->linesize ||
1023             pic_arg->linesize[1] != s->uvlinesize ||
1024             pic_arg->linesize[2] != s->uvlinesize)
1025             direct = 0;
1026         if ((s->width & 15) || (s->height & 15))
1027             direct = 0;
1028
1029         ff_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
1030                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1031
1032         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1033         if (i < 0)
1034             return i;
1035
1036         pic = &s->picture[i];
1037         pic->reference = 3;
1038
1039         if (direct) {
1040             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1041                 return ret;
1042         }
1043         ret = alloc_picture(s, pic, direct);
1044         if (ret < 0)
1045             return ret;
1046
1047         if (!direct) {
1048             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1049                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1050                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1051                 // empty
1052             } else {
1053                 int h_chroma_shift, v_chroma_shift;
1054                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1055                                                  &h_chroma_shift,
1056                                                  &v_chroma_shift);
1057
1058                 for (i = 0; i < 3; i++) {
1059                     int src_stride = pic_arg->linesize[i];
1060                     int dst_stride = i ? s->uvlinesize : s->linesize;
1061                     int h_shift = i ? h_chroma_shift : 0;
1062                     int v_shift = i ? v_chroma_shift : 0;
1063                     int w = s->width  >> h_shift;
1064                     int h = s->height >> v_shift;
1065                     uint8_t *src = pic_arg->data[i];
1066                     uint8_t *dst = pic->f->data[i];
1067
1068                     if (!s->avctx->rc_buffer_size)
1069                         dst += INPLACE_OFFSET;
1070
1071                     if (src_stride == dst_stride)
1072                         memcpy(dst, src, src_stride * h);
1073                     else {
1074                         int h2 = h;
1075                         uint8_t *dst2 = dst;
1076                         while (h2--) {
1077                             memcpy(dst2, src, w);
1078                             dst2 += dst_stride;
1079                             src += src_stride;
1080                         }
1081                     }
1082                     if ((s->width & 15) || (s->height & 15)) {
1083                         s->mpvencdsp.draw_edges(dst, dst_stride,
1084                                                 w, h,
1085                                                 16 >> h_shift,
1086                                                 16 >> v_shift,
1087                                                 EDGE_BOTTOM);
1088                     }
1089                 }
1090             }
1091         }
1092         ret = av_frame_copy_props(pic->f, pic_arg);
1093         if (ret < 0)
1094             return ret;
1095
1096         pic->f->display_picture_number = display_picture_number;
1097         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1098     } else {
1099         /* Flushing: When we have not received enough input frames,
1100          * ensure s->input_picture[0] contains the first picture */
1101         for (flush_offset = 0; flush_offset < encoding_delay + 1; flush_offset++)
1102             if (s->input_picture[flush_offset])
1103                 break;
1104
1105         if (flush_offset <= 1)
1106             flush_offset = 1;
1107         else
1108             encoding_delay = encoding_delay - flush_offset + 1;
1109     }
1110
1111     /* shift buffer entries */
1112     for (i = flush_offset; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1113         s->input_picture[i - flush_offset] = s->input_picture[i];
1114
1115     s->input_picture[encoding_delay] = (Picture*) pic;
1116
1117     return 0;
1118 }
1119
1120 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1121 {
1122     int x, y, plane;
1123     int score = 0;
1124     int64_t score64 = 0;
1125
1126     for (plane = 0; plane < 3; plane++) {
1127         const int stride = p->f->linesize[plane];
1128         const int bw = plane ? 1 : 2;
1129         for (y = 0; y < s->mb_height * bw; y++) {
1130             for (x = 0; x < s->mb_width * bw; x++) {
1131                 int off = p->shared ? 0 : 16;
1132                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1133                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1134                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1135
1136                 switch (s->avctx->frame_skip_exp) {
1137                 case 0: score    =  FFMAX(score, v);          break;
1138                 case 1: score   += FFABS(v);                  break;
1139                 case 2: score   += v * v;                     break;
1140                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1141                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1142                 }
1143             }
1144         }
1145     }
1146
1147     if (score)
1148         score64 = score;
1149
1150     if (score64 < s->avctx->frame_skip_threshold)
1151         return 1;
1152     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1153         return 1;
1154     return 0;
1155 }
1156
1157 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1158 {
1159     AVPacket pkt = { 0 };
1160     int ret, got_output;
1161
1162     av_init_packet(&pkt);
1163     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1164     if (ret < 0)
1165         return ret;
1166
1167     ret = pkt.size;
1168     av_packet_unref(&pkt);
1169     return ret;
1170 }
1171
1172 static int estimate_best_b_count(MpegEncContext *s)
1173 {
1174     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1175     AVCodecContext *c = avcodec_alloc_context3(NULL);
1176     const int scale = s->avctx->brd_scale;
1177     int i, j, out_size, p_lambda, b_lambda, lambda2;
1178     int64_t best_rd  = INT64_MAX;
1179     int best_b_count = -1;
1180
1181     if (!c)
1182         return AVERROR(ENOMEM);
1183     assert(scale >= 0 && scale <= 3);
1184
1185     //emms_c();
1186     //s->next_picture_ptr->quality;
1187     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1188     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1189     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1190     if (!b_lambda) // FIXME we should do this somewhere else
1191         b_lambda = p_lambda;
1192     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1193                FF_LAMBDA_SHIFT;
1194
1195     c->width        = s->width  >> scale;
1196     c->height       = s->height >> scale;
1197     c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1198     c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1199     c->mb_decision  = s->avctx->mb_decision;
1200     c->me_cmp       = s->avctx->me_cmp;
1201     c->mb_cmp       = s->avctx->mb_cmp;
1202     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1203     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1204     c->time_base    = s->avctx->time_base;
1205     c->max_b_frames = s->max_b_frames;
1206
1207     if (avcodec_open2(c, codec, NULL) < 0)
1208         return -1;
1209
1210     for (i = 0; i < s->max_b_frames + 2; i++) {
1211         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1212                                                 s->next_picture_ptr;
1213
1214         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1215             pre_input = *pre_input_ptr;
1216
1217             if (!pre_input.shared && i) {
1218                 pre_input.f->data[0] += INPLACE_OFFSET;
1219                 pre_input.f->data[1] += INPLACE_OFFSET;
1220                 pre_input.f->data[2] += INPLACE_OFFSET;
1221             }
1222
1223             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1224                                        s->tmp_frames[i]->linesize[0],
1225                                        pre_input.f->data[0],
1226                                        pre_input.f->linesize[0],
1227                                        c->width, c->height);
1228             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1229                                        s->tmp_frames[i]->linesize[1],
1230                                        pre_input.f->data[1],
1231                                        pre_input.f->linesize[1],
1232                                        c->width >> 1, c->height >> 1);
1233             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1234                                        s->tmp_frames[i]->linesize[2],
1235                                        pre_input.f->data[2],
1236                                        pre_input.f->linesize[2],
1237                                        c->width >> 1, c->height >> 1);
1238         }
1239     }
1240
1241     for (j = 0; j < s->max_b_frames + 1; j++) {
1242         int64_t rd = 0;
1243
1244         if (!s->input_picture[j])
1245             break;
1246
1247         c->error[0] = c->error[1] = c->error[2] = 0;
1248
1249         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1250         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1251
1252         out_size = encode_frame(c, s->tmp_frames[0]);
1253
1254         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1255
1256         for (i = 0; i < s->max_b_frames + 1; i++) {
1257             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1258
1259             s->tmp_frames[i + 1]->pict_type = is_p ?
1260                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1261             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1262
1263             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1264
1265             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1266         }
1267
1268         /* get the delayed frames */
1269         while (out_size) {
1270             out_size = encode_frame(c, NULL);
1271             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1272         }
1273
1274         rd += c->error[0] + c->error[1] + c->error[2];
1275
1276         if (rd < best_rd) {
1277             best_rd = rd;
1278             best_b_count = j;
1279         }
1280     }
1281
1282     avcodec_close(c);
1283     av_freep(&c);
1284
1285     return best_b_count;
1286 }
1287
1288 static int select_input_picture(MpegEncContext *s)
1289 {
1290     int i, ret;
1291
1292     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1293         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1294     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1295
1296     /* set next picture type & ordering */
1297     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1298         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1299             !s->next_picture_ptr || s->intra_only) {
1300             s->reordered_input_picture[0] = s->input_picture[0];
1301             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1302             s->reordered_input_picture[0]->f->coded_picture_number =
1303                 s->coded_picture_number++;
1304         } else {
1305             int b_frames;
1306
1307             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1308                 if (s->picture_in_gop_number < s->gop_size &&
1309                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1310                     // FIXME check that te gop check above is +-1 correct
1311                     av_frame_unref(s->input_picture[0]->f);
1312
1313                     emms_c();
1314                     ff_vbv_update(s, 0);
1315
1316                     goto no_output_pic;
1317                 }
1318             }
1319
1320             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1321                 for (i = 0; i < s->max_b_frames + 1; i++) {
1322                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1323
1324                     if (pict_num >= s->rc_context.num_entries)
1325                         break;
1326                     if (!s->input_picture[i]) {
1327                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1328                         break;
1329                     }
1330
1331                     s->input_picture[i]->f->pict_type =
1332                         s->rc_context.entry[pict_num].new_pict_type;
1333                 }
1334             }
1335
1336             if (s->avctx->b_frame_strategy == 0) {
1337                 b_frames = s->max_b_frames;
1338                 while (b_frames && !s->input_picture[b_frames])
1339                     b_frames--;
1340             } else if (s->avctx->b_frame_strategy == 1) {
1341                 for (i = 1; i < s->max_b_frames + 1; i++) {
1342                     if (s->input_picture[i] &&
1343                         s->input_picture[i]->b_frame_score == 0) {
1344                         s->input_picture[i]->b_frame_score =
1345                             get_intra_count(s,
1346                                             s->input_picture[i    ]->f->data[0],
1347                                             s->input_picture[i - 1]->f->data[0],
1348                                             s->linesize) + 1;
1349                     }
1350                 }
1351                 for (i = 0; i < s->max_b_frames + 1; i++) {
1352                     if (!s->input_picture[i] ||
1353                         s->input_picture[i]->b_frame_score - 1 >
1354                             s->mb_num / s->avctx->b_sensitivity)
1355                         break;
1356                 }
1357
1358                 b_frames = FFMAX(0, i - 1);
1359
1360                 /* reset scores */
1361                 for (i = 0; i < b_frames + 1; i++) {
1362                     s->input_picture[i]->b_frame_score = 0;
1363                 }
1364             } else if (s->avctx->b_frame_strategy == 2) {
1365                 b_frames = estimate_best_b_count(s);
1366             } else {
1367                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1368                 b_frames = 0;
1369             }
1370
1371             emms_c();
1372
1373             for (i = b_frames - 1; i >= 0; i--) {
1374                 int type = s->input_picture[i]->f->pict_type;
1375                 if (type && type != AV_PICTURE_TYPE_B)
1376                     b_frames = i;
1377             }
1378             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1379                 b_frames == s->max_b_frames) {
1380                 av_log(s->avctx, AV_LOG_ERROR,
1381                        "warning, too many b frames in a row\n");
1382             }
1383
1384             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1385                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1386                     s->gop_size > s->picture_in_gop_number) {
1387                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1388                 } else {
1389                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1390                         b_frames = 0;
1391                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1392                 }
1393             }
1394
1395             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1396                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1397                 b_frames--;
1398
1399             s->reordered_input_picture[0] = s->input_picture[b_frames];
1400             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1401                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1402             s->reordered_input_picture[0]->f->coded_picture_number =
1403                 s->coded_picture_number++;
1404             for (i = 0; i < b_frames; i++) {
1405                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1406                 s->reordered_input_picture[i + 1]->f->pict_type =
1407                     AV_PICTURE_TYPE_B;
1408                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1409                     s->coded_picture_number++;
1410             }
1411         }
1412     }
1413 no_output_pic:
1414     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1415
1416     if (s->reordered_input_picture[0]) {
1417         s->reordered_input_picture[0]->reference =
1418            s->reordered_input_picture[0]->f->pict_type !=
1419                AV_PICTURE_TYPE_B ? 3 : 0;
1420
1421         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1422             return ret;
1423
1424         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1425             // input is a shared pix, so we can't modifiy it -> alloc a new
1426             // one & ensure that the shared one is reuseable
1427
1428             Picture *pic;
1429             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1430             if (i < 0)
1431                 return i;
1432             pic = &s->picture[i];
1433
1434             pic->reference = s->reordered_input_picture[0]->reference;
1435             if (alloc_picture(s, pic, 0) < 0) {
1436                 return -1;
1437             }
1438
1439             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1440             if (ret < 0)
1441                 return ret;
1442
1443             /* mark us unused / free shared pic */
1444             av_frame_unref(s->reordered_input_picture[0]->f);
1445             s->reordered_input_picture[0]->shared = 0;
1446
1447             s->current_picture_ptr = pic;
1448         } else {
1449             // input is not a shared pix -> reuse buffer for current_pix
1450             s->current_picture_ptr = s->reordered_input_picture[0];
1451             for (i = 0; i < 4; i++) {
1452                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1453             }
1454         }
1455         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1456         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1457                                        s->current_picture_ptr)) < 0)
1458             return ret;
1459
1460         s->picture_number = s->new_picture.f->display_picture_number;
1461     }
1462     return 0;
1463 }
1464
1465 static void frame_end(MpegEncContext *s)
1466 {
1467     int i;
1468
1469     if (s->unrestricted_mv &&
1470         s->current_picture.reference &&
1471         !s->intra_only) {
1472         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1473         int hshift = desc->log2_chroma_w;
1474         int vshift = desc->log2_chroma_h;
1475         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1476                                 s->h_edge_pos, s->v_edge_pos,
1477                                 EDGE_WIDTH, EDGE_WIDTH,
1478                                 EDGE_TOP | EDGE_BOTTOM);
1479         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1480                                 s->h_edge_pos >> hshift,
1481                                 s->v_edge_pos >> vshift,
1482                                 EDGE_WIDTH >> hshift,
1483                                 EDGE_WIDTH >> vshift,
1484                                 EDGE_TOP | EDGE_BOTTOM);
1485         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1486                                 s->h_edge_pos >> hshift,
1487                                 s->v_edge_pos >> vshift,
1488                                 EDGE_WIDTH >> hshift,
1489                                 EDGE_WIDTH >> vshift,
1490                                 EDGE_TOP | EDGE_BOTTOM);
1491     }
1492
1493     emms_c();
1494
1495     s->last_pict_type                 = s->pict_type;
1496     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1497     if (s->pict_type!= AV_PICTURE_TYPE_B)
1498         s->last_non_b_pict_type = s->pict_type;
1499
1500     if (s->encoding) {
1501         /* release non-reference frames */
1502         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1503             if (!s->picture[i].reference)
1504                 ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1505         }
1506     }
1507
1508 #if FF_API_CODED_FRAME
1509 FF_DISABLE_DEPRECATION_WARNINGS
1510     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1511 FF_ENABLE_DEPRECATION_WARNINGS
1512 #endif
1513 #if FF_API_ERROR_FRAME
1514 FF_DISABLE_DEPRECATION_WARNINGS
1515     memcpy(s->current_picture.f->error, s->current_picture.encoding_error,
1516            sizeof(s->current_picture.encoding_error));
1517 FF_ENABLE_DEPRECATION_WARNINGS
1518 #endif
1519 }
1520
1521 static void update_noise_reduction(MpegEncContext *s)
1522 {
1523     int intra, i;
1524
1525     for (intra = 0; intra < 2; intra++) {
1526         if (s->dct_count[intra] > (1 << 16)) {
1527             for (i = 0; i < 64; i++) {
1528                 s->dct_error_sum[intra][i] >>= 1;
1529             }
1530             s->dct_count[intra] >>= 1;
1531         }
1532
1533         for (i = 0; i < 64; i++) {
1534             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1535                                        s->dct_count[intra] +
1536                                        s->dct_error_sum[intra][i] / 2) /
1537                                       (s->dct_error_sum[intra][i] + 1);
1538         }
1539     }
1540 }
1541
1542 static int frame_start(MpegEncContext *s)
1543 {
1544     int ret;
1545
1546     /* mark & release old frames */
1547     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1548         s->last_picture_ptr != s->next_picture_ptr &&
1549         s->last_picture_ptr->f->buf[0]) {
1550         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1551     }
1552
1553     s->current_picture_ptr->f->pict_type = s->pict_type;
1554     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1555
1556     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1557     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1558                                    s->current_picture_ptr)) < 0)
1559         return ret;
1560
1561     if (s->pict_type != AV_PICTURE_TYPE_B) {
1562         s->last_picture_ptr = s->next_picture_ptr;
1563         if (!s->droppable)
1564             s->next_picture_ptr = s->current_picture_ptr;
1565     }
1566
1567     if (s->last_picture_ptr) {
1568         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1569         if (s->last_picture_ptr->f->buf[0] &&
1570             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1571                                        s->last_picture_ptr)) < 0)
1572             return ret;
1573     }
1574     if (s->next_picture_ptr) {
1575         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1576         if (s->next_picture_ptr->f->buf[0] &&
1577             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1578                                        s->next_picture_ptr)) < 0)
1579             return ret;
1580     }
1581
1582     if (s->picture_structure!= PICT_FRAME) {
1583         int i;
1584         for (i = 0; i < 4; i++) {
1585             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1586                 s->current_picture.f->data[i] +=
1587                     s->current_picture.f->linesize[i];
1588             }
1589             s->current_picture.f->linesize[i] *= 2;
1590             s->last_picture.f->linesize[i]    *= 2;
1591             s->next_picture.f->linesize[i]    *= 2;
1592         }
1593     }
1594
1595     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1596         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1597         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1598     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1599         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1600         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1601     } else {
1602         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1603         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1604     }
1605
1606     if (s->dct_error_sum) {
1607         assert(s->avctx->noise_reduction && s->encoding);
1608         update_noise_reduction(s);
1609     }
1610
1611     return 0;
1612 }
1613
1614 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1615                           const AVFrame *pic_arg, int *got_packet)
1616 {
1617     MpegEncContext *s = avctx->priv_data;
1618     int i, stuffing_count, ret;
1619     int context_count = s->slice_context_count;
1620
1621     s->picture_in_gop_number++;
1622
1623     if (load_input_picture(s, pic_arg) < 0)
1624         return -1;
1625
1626     if (select_input_picture(s) < 0) {
1627         return -1;
1628     }
1629
1630     /* output? */
1631     if (s->new_picture.f->data[0]) {
1632         uint8_t *sd;
1633         if (!pkt->data &&
1634             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1635             return ret;
1636         if (s->mb_info) {
1637             s->mb_info_ptr = av_packet_new_side_data(pkt,
1638                                  AV_PKT_DATA_H263_MB_INFO,
1639                                  s->mb_width*s->mb_height*12);
1640             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1641         }
1642
1643         for (i = 0; i < context_count; i++) {
1644             int start_y = s->thread_context[i]->start_mb_y;
1645             int   end_y = s->thread_context[i]->  end_mb_y;
1646             int h       = s->mb_height;
1647             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1648             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1649
1650             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1651         }
1652
1653         s->pict_type = s->new_picture.f->pict_type;
1654         //emms_c();
1655         ret = frame_start(s);
1656         if (ret < 0)
1657             return ret;
1658 vbv_retry:
1659         if (encode_picture(s, s->picture_number) < 0)
1660             return -1;
1661
1662 #if FF_API_STAT_BITS
1663 FF_DISABLE_DEPRECATION_WARNINGS
1664         avctx->header_bits = s->header_bits;
1665         avctx->mv_bits     = s->mv_bits;
1666         avctx->misc_bits   = s->misc_bits;
1667         avctx->i_tex_bits  = s->i_tex_bits;
1668         avctx->p_tex_bits  = s->p_tex_bits;
1669         avctx->i_count     = s->i_count;
1670         // FIXME f/b_count in avctx
1671         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1672         avctx->skip_count  = s->skip_count;
1673 FF_ENABLE_DEPRECATION_WARNINGS
1674 #endif
1675
1676         frame_end(s);
1677
1678         sd = av_packet_new_side_data(pkt, AV_PKT_DATA_QUALITY_FACTOR,
1679                                      sizeof(int));
1680         if (!sd)
1681             return AVERROR(ENOMEM);
1682         *(int *)sd = s->current_picture.f->quality;
1683
1684         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1685             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1686
1687         if (avctx->rc_buffer_size) {
1688             RateControlContext *rcc = &s->rc_context;
1689             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1690
1691             if (put_bits_count(&s->pb) > max_size &&
1692                 s->lambda < s->lmax) {
1693                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1694                                        (s->qscale + 1) / s->qscale);
1695                 if (s->adaptive_quant) {
1696                     int i;
1697                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1698                         s->lambda_table[i] =
1699                             FFMAX(s->lambda_table[i] + 1,
1700                                   s->lambda_table[i] * (s->qscale + 1) /
1701                                   s->qscale);
1702                 }
1703                 s->mb_skipped = 0;        // done in frame_start()
1704                 // done in encode_picture() so we must undo it
1705                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1706                     if (s->flipflop_rounding          ||
1707                         s->codec_id == AV_CODEC_ID_H263P ||
1708                         s->codec_id == AV_CODEC_ID_MPEG4)
1709                         s->no_rounding ^= 1;
1710                 }
1711                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1712                     s->time_base       = s->last_time_base;
1713                     s->last_non_b_time = s->time - s->pp_time;
1714                 }
1715                 for (i = 0; i < context_count; i++) {
1716                     PutBitContext *pb = &s->thread_context[i]->pb;
1717                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1718                 }
1719                 goto vbv_retry;
1720             }
1721
1722             assert(s->avctx->rc_max_rate);
1723         }
1724
1725         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1726             ff_write_pass1_stats(s);
1727
1728         for (i = 0; i < 4; i++) {
1729             s->current_picture_ptr->encoding_error[i] = s->current_picture.encoding_error[i];
1730             avctx->error[i] += s->current_picture_ptr->encoding_error[i];
1731         }
1732
1733         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1734             assert(put_bits_count(&s->pb) == s->header_bits + s->mv_bits +
1735                                              s->misc_bits + s->i_tex_bits +
1736                                              s->p_tex_bits);
1737         flush_put_bits(&s->pb);
1738         s->frame_bits  = put_bits_count(&s->pb);
1739
1740         stuffing_count = ff_vbv_update(s, s->frame_bits);
1741         if (stuffing_count) {
1742             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1743                     stuffing_count + 50) {
1744                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1745                 return -1;
1746             }
1747
1748             switch (s->codec_id) {
1749             case AV_CODEC_ID_MPEG1VIDEO:
1750             case AV_CODEC_ID_MPEG2VIDEO:
1751                 while (stuffing_count--) {
1752                     put_bits(&s->pb, 8, 0);
1753                 }
1754             break;
1755             case AV_CODEC_ID_MPEG4:
1756                 put_bits(&s->pb, 16, 0);
1757                 put_bits(&s->pb, 16, 0x1C3);
1758                 stuffing_count -= 4;
1759                 while (stuffing_count--) {
1760                     put_bits(&s->pb, 8, 0xFF);
1761                 }
1762             break;
1763             default:
1764                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1765             }
1766             flush_put_bits(&s->pb);
1767             s->frame_bits  = put_bits_count(&s->pb);
1768         }
1769
1770         /* update mpeg1/2 vbv_delay for CBR */
1771         if (s->avctx->rc_max_rate                          &&
1772             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1773             s->out_format == FMT_MPEG1                     &&
1774             90000LL * (avctx->rc_buffer_size - 1) <=
1775                 s->avctx->rc_max_rate * 0xFFFFLL) {
1776             AVCPBProperties *props;
1777             size_t props_size;
1778
1779             int vbv_delay, min_delay;
1780             double inbits  = s->avctx->rc_max_rate *
1781                              av_q2d(s->avctx->time_base);
1782             int    minbits = s->frame_bits - 8 *
1783                              (s->vbv_delay_ptr - s->pb.buf - 1);
1784             double bits    = s->rc_context.buffer_index + minbits - inbits;
1785
1786             if (bits < 0)
1787                 av_log(s->avctx, AV_LOG_ERROR,
1788                        "Internal error, negative bits\n");
1789
1790             assert(s->repeat_first_field == 0);
1791
1792             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1793             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1794                         s->avctx->rc_max_rate;
1795
1796             vbv_delay = FFMAX(vbv_delay, min_delay);
1797
1798             assert(vbv_delay < 0xFFFF);
1799
1800             s->vbv_delay_ptr[0] &= 0xF8;
1801             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1802             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1803             s->vbv_delay_ptr[2] &= 0x07;
1804             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1805
1806             props = av_cpb_properties_alloc(&props_size);
1807             if (!props)
1808                 return AVERROR(ENOMEM);
1809             props->vbv_delay = vbv_delay * 300;
1810
1811 #if FF_API_VBV_DELAY
1812 FF_DISABLE_DEPRECATION_WARNINGS
1813             avctx->vbv_delay     = vbv_delay * 300;
1814 FF_ENABLE_DEPRECATION_WARNINGS
1815 #endif
1816         }
1817         s->total_bits     += s->frame_bits;
1818 #if FF_API_STAT_BITS
1819 FF_DISABLE_DEPRECATION_WARNINGS
1820         avctx->frame_bits  = s->frame_bits;
1821 FF_ENABLE_DEPRECATION_WARNINGS
1822 #endif
1823
1824
1825         pkt->pts = s->current_picture.f->pts;
1826         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1827             if (!s->current_picture.f->coded_picture_number)
1828                 pkt->dts = pkt->pts - s->dts_delta;
1829             else
1830                 pkt->dts = s->reordered_pts;
1831             s->reordered_pts = pkt->pts;
1832         } else
1833             pkt->dts = pkt->pts;
1834         if (s->current_picture.f->key_frame)
1835             pkt->flags |= AV_PKT_FLAG_KEY;
1836         if (s->mb_info)
1837             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1838     } else {
1839         s->frame_bits = 0;
1840     }
1841     assert((s->frame_bits & 7) == 0);
1842
1843     pkt->size = s->frame_bits / 8;
1844     *got_packet = !!pkt->size;
1845     return 0;
1846 }
1847
1848 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1849                                                 int n, int threshold)
1850 {
1851     static const char tab[64] = {
1852         3, 2, 2, 1, 1, 1, 1, 1,
1853         1, 1, 1, 1, 1, 1, 1, 1,
1854         1, 1, 1, 1, 1, 1, 1, 1,
1855         0, 0, 0, 0, 0, 0, 0, 0,
1856         0, 0, 0, 0, 0, 0, 0, 0,
1857         0, 0, 0, 0, 0, 0, 0, 0,
1858         0, 0, 0, 0, 0, 0, 0, 0,
1859         0, 0, 0, 0, 0, 0, 0, 0
1860     };
1861     int score = 0;
1862     int run = 0;
1863     int i;
1864     int16_t *block = s->block[n];
1865     const int last_index = s->block_last_index[n];
1866     int skip_dc;
1867
1868     if (threshold < 0) {
1869         skip_dc = 0;
1870         threshold = -threshold;
1871     } else
1872         skip_dc = 1;
1873
1874     /* Are all we could set to zero already zero? */
1875     if (last_index <= skip_dc - 1)
1876         return;
1877
1878     for (i = 0; i <= last_index; i++) {
1879         const int j = s->intra_scantable.permutated[i];
1880         const int level = FFABS(block[j]);
1881         if (level == 1) {
1882             if (skip_dc && i == 0)
1883                 continue;
1884             score += tab[run];
1885             run = 0;
1886         } else if (level > 1) {
1887             return;
1888         } else {
1889             run++;
1890         }
1891     }
1892     if (score >= threshold)
1893         return;
1894     for (i = skip_dc; i <= last_index; i++) {
1895         const int j = s->intra_scantable.permutated[i];
1896         block[j] = 0;
1897     }
1898     if (block[0])
1899         s->block_last_index[n] = 0;
1900     else
1901         s->block_last_index[n] = -1;
1902 }
1903
1904 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1905                                int last_index)
1906 {
1907     int i;
1908     const int maxlevel = s->max_qcoeff;
1909     const int minlevel = s->min_qcoeff;
1910     int overflow = 0;
1911
1912     if (s->mb_intra) {
1913         i = 1; // skip clipping of intra dc
1914     } else
1915         i = 0;
1916
1917     for (; i <= last_index; i++) {
1918         const int j = s->intra_scantable.permutated[i];
1919         int level = block[j];
1920
1921         if (level > maxlevel) {
1922             level = maxlevel;
1923             overflow++;
1924         } else if (level < minlevel) {
1925             level = minlevel;
1926             overflow++;
1927         }
1928
1929         block[j] = level;
1930     }
1931
1932     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1933         av_log(s->avctx, AV_LOG_INFO,
1934                "warning, clipping %d dct coefficients to %d..%d\n",
1935                overflow, minlevel, maxlevel);
1936 }
1937
1938 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1939 {
1940     int x, y;
1941     // FIXME optimize
1942     for (y = 0; y < 8; y++) {
1943         for (x = 0; x < 8; x++) {
1944             int x2, y2;
1945             int sum = 0;
1946             int sqr = 0;
1947             int count = 0;
1948
1949             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1950                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1951                     int v = ptr[x2 + y2 * stride];
1952                     sum += v;
1953                     sqr += v * v;
1954                     count++;
1955                 }
1956             }
1957             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1958         }
1959     }
1960 }
1961
1962 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1963                                                 int motion_x, int motion_y,
1964                                                 int mb_block_height,
1965                                                 int mb_block_count)
1966 {
1967     int16_t weight[8][64];
1968     int16_t orig[8][64];
1969     const int mb_x = s->mb_x;
1970     const int mb_y = s->mb_y;
1971     int i;
1972     int skip_dct[8];
1973     int dct_offset = s->linesize * 8; // default for progressive frames
1974     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1975     ptrdiff_t wrap_y, wrap_c;
1976
1977     for (i = 0; i < mb_block_count; i++)
1978         skip_dct[i] = s->skipdct;
1979
1980     if (s->adaptive_quant) {
1981         const int last_qp = s->qscale;
1982         const int mb_xy = mb_x + mb_y * s->mb_stride;
1983
1984         s->lambda = s->lambda_table[mb_xy];
1985         update_qscale(s);
1986
1987         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1988             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1989             s->dquant = s->qscale - last_qp;
1990
1991             if (s->out_format == FMT_H263) {
1992                 s->dquant = av_clip(s->dquant, -2, 2);
1993
1994                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1995                     if (!s->mb_intra) {
1996                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1997                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1998                                 s->dquant = 0;
1999                         }
2000                         if (s->mv_type == MV_TYPE_8X8)
2001                             s->dquant = 0;
2002                     }
2003                 }
2004             }
2005         }
2006         ff_set_qscale(s, last_qp + s->dquant);
2007     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2008         ff_set_qscale(s, s->qscale + s->dquant);
2009
2010     wrap_y = s->linesize;
2011     wrap_c = s->uvlinesize;
2012     ptr_y  = s->new_picture.f->data[0] +
2013              (mb_y * 16 * wrap_y)              + mb_x * 16;
2014     ptr_cb = s->new_picture.f->data[1] +
2015              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
2016     ptr_cr = s->new_picture.f->data[2] +
2017              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
2018
2019     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
2020         uint8_t *ebuf = s->sc.edge_emu_buffer + 32;
2021         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2022                                  wrap_y, wrap_y,
2023                                  16, 16, mb_x * 16, mb_y * 16,
2024                                  s->width, s->height);
2025         ptr_y = ebuf;
2026         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
2027                                  wrap_c, wrap_c,
2028                                  8, mb_block_height, mb_x * 8, mb_y * 8,
2029                                  s->width >> 1, s->height >> 1);
2030         ptr_cb = ebuf + 18 * wrap_y;
2031         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
2032                                  wrap_c, wrap_c,
2033                                  8, mb_block_height, mb_x * 8, mb_y * 8,
2034                                  s->width >> 1, s->height >> 1);
2035         ptr_cr = ebuf + 18 * wrap_y + 8;
2036     }
2037
2038     if (s->mb_intra) {
2039         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2040             int progressive_score, interlaced_score;
2041
2042             s->interlaced_dct = 0;
2043             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2044                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2045                                                      NULL, wrap_y, 8) - 400;
2046
2047             if (progressive_score > 0) {
2048                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2049                                                         NULL, wrap_y * 2, 8) +
2050                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2051                                                         NULL, wrap_y * 2, 8);
2052                 if (progressive_score > interlaced_score) {
2053                     s->interlaced_dct = 1;
2054
2055                     dct_offset = wrap_y;
2056                     wrap_y <<= 1;
2057                     if (s->chroma_format == CHROMA_422)
2058                         wrap_c <<= 1;
2059                 }
2060             }
2061         }
2062
2063         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2064         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2065         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2066         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2067
2068         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2069             skip_dct[4] = 1;
2070             skip_dct[5] = 1;
2071         } else {
2072             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2073             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2074             if (!s->chroma_y_shift) { /* 422 */
2075                 s->pdsp.get_pixels(s->block[6],
2076                                    ptr_cb + (dct_offset >> 1), wrap_c);
2077                 s->pdsp.get_pixels(s->block[7],
2078                                    ptr_cr + (dct_offset >> 1), wrap_c);
2079             }
2080         }
2081     } else {
2082         op_pixels_func (*op_pix)[4];
2083         qpel_mc_func (*op_qpix)[16];
2084         uint8_t *dest_y, *dest_cb, *dest_cr;
2085
2086         dest_y  = s->dest[0];
2087         dest_cb = s->dest[1];
2088         dest_cr = s->dest[2];
2089
2090         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2091             op_pix  = s->hdsp.put_pixels_tab;
2092             op_qpix = s->qdsp.put_qpel_pixels_tab;
2093         } else {
2094             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2095             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2096         }
2097
2098         if (s->mv_dir & MV_DIR_FORWARD) {
2099             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2100                           s->last_picture.f->data,
2101                           op_pix, op_qpix);
2102             op_pix  = s->hdsp.avg_pixels_tab;
2103             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2104         }
2105         if (s->mv_dir & MV_DIR_BACKWARD) {
2106             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2107                           s->next_picture.f->data,
2108                           op_pix, op_qpix);
2109         }
2110
2111         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2112             int progressive_score, interlaced_score;
2113
2114             s->interlaced_dct = 0;
2115             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2116                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2117                                                      ptr_y + wrap_y * 8,
2118                                                      wrap_y, 8) - 400;
2119
2120             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2121                 progressive_score -= 400;
2122
2123             if (progressive_score > 0) {
2124                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2125                                                         wrap_y * 2, 8) +
2126                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2127                                                         ptr_y + wrap_y,
2128                                                         wrap_y * 2, 8);
2129
2130                 if (progressive_score > interlaced_score) {
2131                     s->interlaced_dct = 1;
2132
2133                     dct_offset = wrap_y;
2134                     wrap_y <<= 1;
2135                     if (s->chroma_format == CHROMA_422)
2136                         wrap_c <<= 1;
2137                 }
2138             }
2139         }
2140
2141         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2142         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2143         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2144                             dest_y + dct_offset, wrap_y);
2145         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2146                             dest_y + dct_offset + 8, wrap_y);
2147
2148         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2149             skip_dct[4] = 1;
2150             skip_dct[5] = 1;
2151         } else {
2152             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2153             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2154             if (!s->chroma_y_shift) { /* 422 */
2155                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2156                                     dest_cb + (dct_offset >> 1), wrap_c);
2157                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2158                                     dest_cr + (dct_offset >> 1), wrap_c);
2159             }
2160         }
2161         /* pre quantization */
2162         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2163                 2 * s->qscale * s->qscale) {
2164             // FIXME optimize
2165             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2166                 skip_dct[0] = 1;
2167             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2168                 skip_dct[1] = 1;
2169             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2170                                wrap_y, 8) < 20 * s->qscale)
2171                 skip_dct[2] = 1;
2172             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2173                                wrap_y, 8) < 20 * s->qscale)
2174                 skip_dct[3] = 1;
2175             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2176                 skip_dct[4] = 1;
2177             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2178                 skip_dct[5] = 1;
2179             if (!s->chroma_y_shift) { /* 422 */
2180                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2181                                    dest_cb + (dct_offset >> 1),
2182                                    wrap_c, 8) < 20 * s->qscale)
2183                     skip_dct[6] = 1;
2184                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2185                                    dest_cr + (dct_offset >> 1),
2186                                    wrap_c, 8) < 20 * s->qscale)
2187                     skip_dct[7] = 1;
2188             }
2189         }
2190     }
2191
2192     if (s->quantizer_noise_shaping) {
2193         if (!skip_dct[0])
2194             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2195         if (!skip_dct[1])
2196             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2197         if (!skip_dct[2])
2198             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2199         if (!skip_dct[3])
2200             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2201         if (!skip_dct[4])
2202             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2203         if (!skip_dct[5])
2204             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2205         if (!s->chroma_y_shift) { /* 422 */
2206             if (!skip_dct[6])
2207                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2208                                   wrap_c);
2209             if (!skip_dct[7])
2210                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2211                                   wrap_c);
2212         }
2213         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2214     }
2215
2216     /* DCT & quantize */
2217     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2218     {
2219         for (i = 0; i < mb_block_count; i++) {
2220             if (!skip_dct[i]) {
2221                 int overflow;
2222                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2223                 // FIXME we could decide to change to quantizer instead of
2224                 // clipping
2225                 // JS: I don't think that would be a good idea it could lower
2226                 //     quality instead of improve it. Just INTRADC clipping
2227                 //     deserves changes in quantizer
2228                 if (overflow)
2229                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2230             } else
2231                 s->block_last_index[i] = -1;
2232         }
2233         if (s->quantizer_noise_shaping) {
2234             for (i = 0; i < mb_block_count; i++) {
2235                 if (!skip_dct[i]) {
2236                     s->block_last_index[i] =
2237                         dct_quantize_refine(s, s->block[i], weight[i],
2238                                             orig[i], i, s->qscale);
2239                 }
2240             }
2241         }
2242
2243         if (s->luma_elim_threshold && !s->mb_intra)
2244             for (i = 0; i < 4; i++)
2245                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2246         if (s->chroma_elim_threshold && !s->mb_intra)
2247             for (i = 4; i < mb_block_count; i++)
2248                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2249
2250         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2251             for (i = 0; i < mb_block_count; i++) {
2252                 if (s->block_last_index[i] == -1)
2253                     s->coded_score[i] = INT_MAX / 256;
2254             }
2255         }
2256     }
2257
2258     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2259         s->block_last_index[4] =
2260         s->block_last_index[5] = 0;
2261         s->block[4][0] =
2262         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2263     }
2264
2265     // non c quantize code returns incorrect block_last_index FIXME
2266     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2267         for (i = 0; i < mb_block_count; i++) {
2268             int j;
2269             if (s->block_last_index[i] > 0) {
2270                 for (j = 63; j > 0; j--) {
2271                     if (s->block[i][s->intra_scantable.permutated[j]])
2272                         break;
2273                 }
2274                 s->block_last_index[i] = j;
2275             }
2276         }
2277     }
2278
2279     /* huffman encode */
2280     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2281     case AV_CODEC_ID_MPEG1VIDEO:
2282     case AV_CODEC_ID_MPEG2VIDEO:
2283         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2284             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2285         break;
2286     case AV_CODEC_ID_MPEG4:
2287         if (CONFIG_MPEG4_ENCODER)
2288             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2289         break;
2290     case AV_CODEC_ID_MSMPEG4V2:
2291     case AV_CODEC_ID_MSMPEG4V3:
2292     case AV_CODEC_ID_WMV1:
2293         if (CONFIG_MSMPEG4_ENCODER)
2294             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2295         break;
2296     case AV_CODEC_ID_WMV2:
2297         if (CONFIG_WMV2_ENCODER)
2298             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2299         break;
2300     case AV_CODEC_ID_H261:
2301         if (CONFIG_H261_ENCODER)
2302             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2303         break;
2304     case AV_CODEC_ID_H263:
2305     case AV_CODEC_ID_H263P:
2306     case AV_CODEC_ID_FLV1:
2307     case AV_CODEC_ID_RV10:
2308     case AV_CODEC_ID_RV20:
2309         if (CONFIG_H263_ENCODER)
2310             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2311         break;
2312     case AV_CODEC_ID_MJPEG:
2313         if (CONFIG_MJPEG_ENCODER)
2314             ff_mjpeg_encode_mb(s, s->block);
2315         break;
2316     default:
2317         assert(0);
2318     }
2319 }
2320
2321 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2322 {
2323     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2324     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2325 }
2326
2327 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2328     int i;
2329
2330     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2331
2332     /* mpeg1 */
2333     d->mb_skip_run= s->mb_skip_run;
2334     for(i=0; i<3; i++)
2335         d->last_dc[i] = s->last_dc[i];
2336
2337     /* statistics */
2338     d->mv_bits= s->mv_bits;
2339     d->i_tex_bits= s->i_tex_bits;
2340     d->p_tex_bits= s->p_tex_bits;
2341     d->i_count= s->i_count;
2342     d->f_count= s->f_count;
2343     d->b_count= s->b_count;
2344     d->skip_count= s->skip_count;
2345     d->misc_bits= s->misc_bits;
2346     d->last_bits= 0;
2347
2348     d->mb_skipped= 0;
2349     d->qscale= s->qscale;
2350     d->dquant= s->dquant;
2351
2352     d->esc3_level_length= s->esc3_level_length;
2353 }
2354
2355 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2356     int i;
2357
2358     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2359     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2360
2361     /* mpeg1 */
2362     d->mb_skip_run= s->mb_skip_run;
2363     for(i=0; i<3; i++)
2364         d->last_dc[i] = s->last_dc[i];
2365
2366     /* statistics */
2367     d->mv_bits= s->mv_bits;
2368     d->i_tex_bits= s->i_tex_bits;
2369     d->p_tex_bits= s->p_tex_bits;
2370     d->i_count= s->i_count;
2371     d->f_count= s->f_count;
2372     d->b_count= s->b_count;
2373     d->skip_count= s->skip_count;
2374     d->misc_bits= s->misc_bits;
2375
2376     d->mb_intra= s->mb_intra;
2377     d->mb_skipped= s->mb_skipped;
2378     d->mv_type= s->mv_type;
2379     d->mv_dir= s->mv_dir;
2380     d->pb= s->pb;
2381     if(s->data_partitioning){
2382         d->pb2= s->pb2;
2383         d->tex_pb= s->tex_pb;
2384     }
2385     d->block= s->block;
2386     for(i=0; i<8; i++)
2387         d->block_last_index[i]= s->block_last_index[i];
2388     d->interlaced_dct= s->interlaced_dct;
2389     d->qscale= s->qscale;
2390
2391     d->esc3_level_length= s->esc3_level_length;
2392 }
2393
2394 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2395                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2396                            int *dmin, int *next_block, int motion_x, int motion_y)
2397 {
2398     int score;
2399     uint8_t *dest_backup[3];
2400
2401     copy_context_before_encode(s, backup, type);
2402
2403     s->block= s->blocks[*next_block];
2404     s->pb= pb[*next_block];
2405     if(s->data_partitioning){
2406         s->pb2   = pb2   [*next_block];
2407         s->tex_pb= tex_pb[*next_block];
2408     }
2409
2410     if(*next_block){
2411         memcpy(dest_backup, s->dest, sizeof(s->dest));
2412         s->dest[0] = s->sc.rd_scratchpad;
2413         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2414         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2415         assert(s->linesize >= 32); //FIXME
2416     }
2417
2418     encode_mb(s, motion_x, motion_y);
2419
2420     score= put_bits_count(&s->pb);
2421     if(s->data_partitioning){
2422         score+= put_bits_count(&s->pb2);
2423         score+= put_bits_count(&s->tex_pb);
2424     }
2425
2426     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2427         ff_mpv_decode_mb(s, s->block);
2428
2429         score *= s->lambda2;
2430         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2431     }
2432
2433     if(*next_block){
2434         memcpy(s->dest, dest_backup, sizeof(s->dest));
2435     }
2436
2437     if(score<*dmin){
2438         *dmin= score;
2439         *next_block^=1;
2440
2441         copy_context_after_encode(best, s, type);
2442     }
2443 }
2444
2445 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2446     uint32_t *sq = ff_square_tab + 256;
2447     int acc=0;
2448     int x,y;
2449
2450     if(w==16 && h==16)
2451         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2452     else if(w==8 && h==8)
2453         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2454
2455     for(y=0; y<h; y++){
2456         for(x=0; x<w; x++){
2457             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2458         }
2459     }
2460
2461     assert(acc>=0);
2462
2463     return acc;
2464 }
2465
2466 static int sse_mb(MpegEncContext *s){
2467     int w= 16;
2468     int h= 16;
2469
2470     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2471     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2472
2473     if(w==16 && h==16)
2474       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2475         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2476                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2477                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2478       }else{
2479         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2480                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2481                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2482       }
2483     else
2484         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2485                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2486                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2487 }
2488
2489 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2490     MpegEncContext *s= *(void**)arg;
2491
2492
2493     s->me.pre_pass=1;
2494     s->me.dia_size= s->avctx->pre_dia_size;
2495     s->first_slice_line=1;
2496     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2497         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2498             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2499         }
2500         s->first_slice_line=0;
2501     }
2502
2503     s->me.pre_pass=0;
2504
2505     return 0;
2506 }
2507
2508 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2509     MpegEncContext *s= *(void**)arg;
2510
2511     s->me.dia_size= s->avctx->dia_size;
2512     s->first_slice_line=1;
2513     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2514         s->mb_x=0; //for block init below
2515         ff_init_block_index(s);
2516         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2517             s->block_index[0]+=2;
2518             s->block_index[1]+=2;
2519             s->block_index[2]+=2;
2520             s->block_index[3]+=2;
2521
2522             /* compute motion vector & mb_type and store in context */
2523             if(s->pict_type==AV_PICTURE_TYPE_B)
2524                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2525             else
2526                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2527         }
2528         s->first_slice_line=0;
2529     }
2530     return 0;
2531 }
2532
2533 static int mb_var_thread(AVCodecContext *c, void *arg){
2534     MpegEncContext *s= *(void**)arg;
2535     int mb_x, mb_y;
2536
2537     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2538         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2539             int xx = mb_x * 16;
2540             int yy = mb_y * 16;
2541             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2542             int varc;
2543             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2544
2545             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2546                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2547
2548             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2549             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2550             s->me.mb_var_sum_temp    += varc;
2551         }
2552     }
2553     return 0;
2554 }
2555
2556 static void write_slice_end(MpegEncContext *s){
2557     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2558         if(s->partitioned_frame){
2559             ff_mpeg4_merge_partitions(s);
2560         }
2561
2562         ff_mpeg4_stuffing(&s->pb);
2563     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2564         ff_mjpeg_encode_stuffing(&s->pb);
2565     }
2566
2567     avpriv_align_put_bits(&s->pb);
2568     flush_put_bits(&s->pb);
2569
2570     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2571         s->misc_bits+= get_bits_diff(s);
2572 }
2573
2574 static void write_mb_info(MpegEncContext *s)
2575 {
2576     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2577     int offset = put_bits_count(&s->pb);
2578     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2579     int gobn = s->mb_y / s->gob_index;
2580     int pred_x, pred_y;
2581     if (CONFIG_H263_ENCODER)
2582         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2583     bytestream_put_le32(&ptr, offset);
2584     bytestream_put_byte(&ptr, s->qscale);
2585     bytestream_put_byte(&ptr, gobn);
2586     bytestream_put_le16(&ptr, mba);
2587     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2588     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2589     /* 4MV not implemented */
2590     bytestream_put_byte(&ptr, 0); /* hmv2 */
2591     bytestream_put_byte(&ptr, 0); /* vmv2 */
2592 }
2593
2594 static void update_mb_info(MpegEncContext *s, int startcode)
2595 {
2596     if (!s->mb_info)
2597         return;
2598     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2599         s->mb_info_size += 12;
2600         s->prev_mb_info = s->last_mb_info;
2601     }
2602     if (startcode) {
2603         s->prev_mb_info = put_bits_count(&s->pb)/8;
2604         /* This might have incremented mb_info_size above, and we return without
2605          * actually writing any info into that slot yet. But in that case,
2606          * this will be called again at the start of the after writing the
2607          * start code, actually writing the mb info. */
2608         return;
2609     }
2610
2611     s->last_mb_info = put_bits_count(&s->pb)/8;
2612     if (!s->mb_info_size)
2613         s->mb_info_size += 12;
2614     write_mb_info(s);
2615 }
2616
2617 static int encode_thread(AVCodecContext *c, void *arg){
2618     MpegEncContext *s= *(void**)arg;
2619     int mb_x, mb_y, pdif = 0;
2620     int chr_h= 16>>s->chroma_y_shift;
2621     int i, j;
2622     MpegEncContext best_s = { 0 }, backup_s;
2623     uint8_t bit_buf[2][MAX_MB_BYTES];
2624     uint8_t bit_buf2[2][MAX_MB_BYTES];
2625     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2626     PutBitContext pb[2], pb2[2], tex_pb[2];
2627
2628     for(i=0; i<2; i++){
2629         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2630         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2631         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2632     }
2633
2634     s->last_bits= put_bits_count(&s->pb);
2635     s->mv_bits=0;
2636     s->misc_bits=0;
2637     s->i_tex_bits=0;
2638     s->p_tex_bits=0;
2639     s->i_count=0;
2640     s->f_count=0;
2641     s->b_count=0;
2642     s->skip_count=0;
2643
2644     for(i=0; i<3; i++){
2645         /* init last dc values */
2646         /* note: quant matrix value (8) is implied here */
2647         s->last_dc[i] = 128 << s->intra_dc_precision;
2648
2649         s->current_picture.encoding_error[i] = 0;
2650     }
2651     s->mb_skip_run = 0;
2652     memset(s->last_mv, 0, sizeof(s->last_mv));
2653
2654     s->last_mv_dir = 0;
2655
2656     switch(s->codec_id){
2657     case AV_CODEC_ID_H263:
2658     case AV_CODEC_ID_H263P:
2659     case AV_CODEC_ID_FLV1:
2660         if (CONFIG_H263_ENCODER)
2661             s->gob_index = H263_GOB_HEIGHT(s->height);
2662         break;
2663     case AV_CODEC_ID_MPEG4:
2664         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2665             ff_mpeg4_init_partitions(s);
2666         break;
2667     }
2668
2669     s->resync_mb_x=0;
2670     s->resync_mb_y=0;
2671     s->first_slice_line = 1;
2672     s->ptr_lastgob = s->pb.buf;
2673     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2674         s->mb_x=0;
2675         s->mb_y= mb_y;
2676
2677         ff_set_qscale(s, s->qscale);
2678         ff_init_block_index(s);
2679
2680         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2681             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2682             int mb_type= s->mb_type[xy];
2683 //            int d;
2684             int dmin= INT_MAX;
2685             int dir;
2686
2687             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2688                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2689                 return -1;
2690             }
2691             if(s->data_partitioning){
2692                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2693                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2694                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2695                     return -1;
2696                 }
2697             }
2698
2699             s->mb_x = mb_x;
2700             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2701             ff_update_block_index(s);
2702
2703             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2704                 ff_h261_reorder_mb_index(s);
2705                 xy= s->mb_y*s->mb_stride + s->mb_x;
2706                 mb_type= s->mb_type[xy];
2707             }
2708
2709             /* write gob / video packet header  */
2710             if(s->rtp_mode){
2711                 int current_packet_size, is_gob_start;
2712
2713                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2714
2715                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2716
2717                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2718
2719                 switch(s->codec_id){
2720                 case AV_CODEC_ID_H263:
2721                 case AV_CODEC_ID_H263P:
2722                     if(!s->h263_slice_structured)
2723                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2724                     break;
2725                 case AV_CODEC_ID_MPEG2VIDEO:
2726                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2727                 case AV_CODEC_ID_MPEG1VIDEO:
2728                     if(s->mb_skip_run) is_gob_start=0;
2729                     break;
2730                 }
2731
2732                 if(is_gob_start){
2733                     if(s->start_mb_y != mb_y || mb_x!=0){
2734                         write_slice_end(s);
2735
2736                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2737                             ff_mpeg4_init_partitions(s);
2738                         }
2739                     }
2740
2741                     assert((put_bits_count(&s->pb)&7) == 0);
2742                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2743
2744                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2745                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2746                         int d = 100 / s->error_rate;
2747                         if(r % d == 0){
2748                             current_packet_size=0;
2749                             s->pb.buf_ptr= s->ptr_lastgob;
2750                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2751                         }
2752                     }
2753
2754 #if FF_API_RTP_CALLBACK
2755 FF_DISABLE_DEPRECATION_WARNINGS
2756                     if (s->avctx->rtp_callback){
2757                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2758                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2759                     }
2760 FF_ENABLE_DEPRECATION_WARNINGS
2761 #endif
2762                     update_mb_info(s, 1);
2763
2764                     switch(s->codec_id){
2765                     case AV_CODEC_ID_MPEG4:
2766                         if (CONFIG_MPEG4_ENCODER) {
2767                             ff_mpeg4_encode_video_packet_header(s);
2768                             ff_mpeg4_clean_buffers(s);
2769                         }
2770                     break;
2771                     case AV_CODEC_ID_MPEG1VIDEO:
2772                     case AV_CODEC_ID_MPEG2VIDEO:
2773                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2774                             ff_mpeg1_encode_slice_header(s);
2775                             ff_mpeg1_clean_buffers(s);
2776                         }
2777                     break;
2778                     case AV_CODEC_ID_H263:
2779                     case AV_CODEC_ID_H263P:
2780                         if (CONFIG_H263_ENCODER)
2781                             ff_h263_encode_gob_header(s, mb_y);
2782                     break;
2783                     }
2784
2785                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2786                         int bits= put_bits_count(&s->pb);
2787                         s->misc_bits+= bits - s->last_bits;
2788                         s->last_bits= bits;
2789                     }
2790
2791                     s->ptr_lastgob += current_packet_size;
2792                     s->first_slice_line=1;
2793                     s->resync_mb_x=mb_x;
2794                     s->resync_mb_y=mb_y;
2795                 }
2796             }
2797
2798             if(  (s->resync_mb_x   == s->mb_x)
2799                && s->resync_mb_y+1 == s->mb_y){
2800                 s->first_slice_line=0;
2801             }
2802
2803             s->mb_skipped=0;
2804             s->dquant=0; //only for QP_RD
2805
2806             update_mb_info(s, 0);
2807
2808             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2809                 int next_block=0;
2810                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2811
2812                 copy_context_before_encode(&backup_s, s, -1);
2813                 backup_s.pb= s->pb;
2814                 best_s.data_partitioning= s->data_partitioning;
2815                 best_s.partitioned_frame= s->partitioned_frame;
2816                 if(s->data_partitioning){
2817                     backup_s.pb2= s->pb2;
2818                     backup_s.tex_pb= s->tex_pb;
2819                 }
2820
2821                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2822                     s->mv_dir = MV_DIR_FORWARD;
2823                     s->mv_type = MV_TYPE_16X16;
2824                     s->mb_intra= 0;
2825                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2826                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2827                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2828                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2829                 }
2830                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2831                     s->mv_dir = MV_DIR_FORWARD;
2832                     s->mv_type = MV_TYPE_FIELD;
2833                     s->mb_intra= 0;
2834                     for(i=0; i<2; i++){
2835                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2836                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2837                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2838                     }
2839                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2840                                  &dmin, &next_block, 0, 0);
2841                 }
2842                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2843                     s->mv_dir = MV_DIR_FORWARD;
2844                     s->mv_type = MV_TYPE_16X16;
2845                     s->mb_intra= 0;
2846                     s->mv[0][0][0] = 0;
2847                     s->mv[0][0][1] = 0;
2848                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2849                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2850                 }
2851                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2852                     s->mv_dir = MV_DIR_FORWARD;
2853                     s->mv_type = MV_TYPE_8X8;
2854                     s->mb_intra= 0;
2855                     for(i=0; i<4; i++){
2856                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2857                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2858                     }
2859                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2860                                  &dmin, &next_block, 0, 0);
2861                 }
2862                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2863                     s->mv_dir = MV_DIR_FORWARD;
2864                     s->mv_type = MV_TYPE_16X16;
2865                     s->mb_intra= 0;
2866                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2867                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2868                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2869                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2870                 }
2871                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2872                     s->mv_dir = MV_DIR_BACKWARD;
2873                     s->mv_type = MV_TYPE_16X16;
2874                     s->mb_intra= 0;
2875                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2876                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2877                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2878                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2879                 }
2880                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2881                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2882                     s->mv_type = MV_TYPE_16X16;
2883                     s->mb_intra= 0;
2884                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2885                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2886                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2887                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2888                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2889                                  &dmin, &next_block, 0, 0);
2890                 }
2891                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2892                     s->mv_dir = MV_DIR_FORWARD;
2893                     s->mv_type = MV_TYPE_FIELD;
2894                     s->mb_intra= 0;
2895                     for(i=0; i<2; i++){
2896                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2897                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2898                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2899                     }
2900                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2901                                  &dmin, &next_block, 0, 0);
2902                 }
2903                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2904                     s->mv_dir = MV_DIR_BACKWARD;
2905                     s->mv_type = MV_TYPE_FIELD;
2906                     s->mb_intra= 0;
2907                     for(i=0; i<2; i++){
2908                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2909                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2910                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2911                     }
2912                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2913                                  &dmin, &next_block, 0, 0);
2914                 }
2915                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2916                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2917                     s->mv_type = MV_TYPE_FIELD;
2918                     s->mb_intra= 0;
2919                     for(dir=0; dir<2; dir++){
2920                         for(i=0; i<2; i++){
2921                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2922                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2923                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2924                         }
2925                     }
2926                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2927                                  &dmin, &next_block, 0, 0);
2928                 }
2929                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2930                     s->mv_dir = 0;
2931                     s->mv_type = MV_TYPE_16X16;
2932                     s->mb_intra= 1;
2933                     s->mv[0][0][0] = 0;
2934                     s->mv[0][0][1] = 0;
2935                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2936                                  &dmin, &next_block, 0, 0);
2937                     if(s->h263_pred || s->h263_aic){
2938                         if(best_s.mb_intra)
2939                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2940                         else
2941                             ff_clean_intra_table_entries(s); //old mode?
2942                     }
2943                 }
2944
2945                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2946                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2947                         const int last_qp= backup_s.qscale;
2948                         int qpi, qp, dc[6];
2949                         int16_t ac[6][16];
2950                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2951                         static const int dquant_tab[4]={-1,1,-2,2};
2952
2953                         assert(backup_s.dquant == 0);
2954
2955                         //FIXME intra
2956                         s->mv_dir= best_s.mv_dir;
2957                         s->mv_type = MV_TYPE_16X16;
2958                         s->mb_intra= best_s.mb_intra;
2959                         s->mv[0][0][0] = best_s.mv[0][0][0];
2960                         s->mv[0][0][1] = best_s.mv[0][0][1];
2961                         s->mv[1][0][0] = best_s.mv[1][0][0];
2962                         s->mv[1][0][1] = best_s.mv[1][0][1];
2963
2964                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2965                         for(; qpi<4; qpi++){
2966                             int dquant= dquant_tab[qpi];
2967                             qp= last_qp + dquant;
2968                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2969                                 continue;
2970                             backup_s.dquant= dquant;
2971                             if(s->mb_intra && s->dc_val[0]){
2972                                 for(i=0; i<6; i++){
2973                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2974                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2975                                 }
2976                             }
2977
2978                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2979                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2980                             if(best_s.qscale != qp){
2981                                 if(s->mb_intra && s->dc_val[0]){
2982                                     for(i=0; i<6; i++){
2983                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2984                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2985                                     }
2986                                 }
2987                             }
2988                         }
2989                     }
2990                 }
2991                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2992                     int mx= s->b_direct_mv_table[xy][0];
2993                     int my= s->b_direct_mv_table[xy][1];
2994
2995                     backup_s.dquant = 0;
2996                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2997                     s->mb_intra= 0;
2998                     ff_mpeg4_set_direct_mv(s, mx, my);
2999                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3000                                  &dmin, &next_block, mx, my);
3001                 }
3002                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3003                     backup_s.dquant = 0;
3004                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3005                     s->mb_intra= 0;
3006                     ff_mpeg4_set_direct_mv(s, 0, 0);
3007                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3008                                  &dmin, &next_block, 0, 0);
3009                 }
3010                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3011                     int coded=0;
3012                     for(i=0; i<6; i++)
3013                         coded |= s->block_last_index[i];
3014                     if(coded){
3015                         int mx,my;
3016                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3017                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3018                             mx=my=0; //FIXME find the one we actually used
3019                             ff_mpeg4_set_direct_mv(s, mx, my);
3020                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3021                             mx= s->mv[1][0][0];
3022                             my= s->mv[1][0][1];
3023                         }else{
3024                             mx= s->mv[0][0][0];
3025                             my= s->mv[0][0][1];
3026                         }
3027
3028                         s->mv_dir= best_s.mv_dir;
3029                         s->mv_type = best_s.mv_type;
3030                         s->mb_intra= 0;
3031 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3032                         s->mv[0][0][1] = best_s.mv[0][0][1];
3033                         s->mv[1][0][0] = best_s.mv[1][0][0];
3034                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3035                         backup_s.dquant= 0;
3036                         s->skipdct=1;
3037                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3038                                         &dmin, &next_block, mx, my);
3039                         s->skipdct=0;
3040                     }
3041                 }
3042
3043                 s->current_picture.qscale_table[xy] = best_s.qscale;
3044
3045                 copy_context_after_encode(s, &best_s, -1);
3046
3047                 pb_bits_count= put_bits_count(&s->pb);
3048                 flush_put_bits(&s->pb);
3049                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3050                 s->pb= backup_s.pb;
3051
3052                 if(s->data_partitioning){
3053                     pb2_bits_count= put_bits_count(&s->pb2);
3054                     flush_put_bits(&s->pb2);
3055                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3056                     s->pb2= backup_s.pb2;
3057
3058                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3059                     flush_put_bits(&s->tex_pb);
3060                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3061                     s->tex_pb= backup_s.tex_pb;
3062                 }
3063                 s->last_bits= put_bits_count(&s->pb);
3064
3065                 if (CONFIG_H263_ENCODER &&
3066                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3067                     ff_h263_update_motion_val(s);
3068
3069                 if(next_block==0){ //FIXME 16 vs linesize16
3070                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3071                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3072                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3073                 }
3074
3075                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3076                     ff_mpv_decode_mb(s, s->block);
3077             } else {
3078                 int motion_x = 0, motion_y = 0;
3079                 s->mv_type=MV_TYPE_16X16;
3080                 // only one MB-Type possible
3081
3082                 switch(mb_type){
3083                 case CANDIDATE_MB_TYPE_INTRA:
3084                     s->mv_dir = 0;
3085                     s->mb_intra= 1;
3086                     motion_x= s->mv[0][0][0] = 0;
3087                     motion_y= s->mv[0][0][1] = 0;
3088                     break;
3089                 case CANDIDATE_MB_TYPE_INTER:
3090                     s->mv_dir = MV_DIR_FORWARD;
3091                     s->mb_intra= 0;
3092                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3093                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3094                     break;
3095                 case CANDIDATE_MB_TYPE_INTER_I:
3096                     s->mv_dir = MV_DIR_FORWARD;
3097                     s->mv_type = MV_TYPE_FIELD;
3098                     s->mb_intra= 0;
3099                     for(i=0; i<2; i++){
3100                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3101                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3102                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3103                     }
3104                     break;
3105                 case CANDIDATE_MB_TYPE_INTER4V:
3106                     s->mv_dir = MV_DIR_FORWARD;
3107                     s->mv_type = MV_TYPE_8X8;
3108                     s->mb_intra= 0;
3109                     for(i=0; i<4; i++){
3110                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3111                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3112                     }
3113                     break;
3114                 case CANDIDATE_MB_TYPE_DIRECT:
3115                     if (CONFIG_MPEG4_ENCODER) {
3116                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3117                         s->mb_intra= 0;
3118                         motion_x=s->b_direct_mv_table[xy][0];
3119                         motion_y=s->b_direct_mv_table[xy][1];
3120                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3121                     }
3122                     break;
3123                 case CANDIDATE_MB_TYPE_DIRECT0:
3124                     if (CONFIG_MPEG4_ENCODER) {
3125                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3126                         s->mb_intra= 0;
3127                         ff_mpeg4_set_direct_mv(s, 0, 0);
3128                     }
3129                     break;
3130                 case CANDIDATE_MB_TYPE_BIDIR:
3131                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3132                     s->mb_intra= 0;
3133                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3134                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3135                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3136                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3137                     break;
3138                 case CANDIDATE_MB_TYPE_BACKWARD:
3139                     s->mv_dir = MV_DIR_BACKWARD;
3140                     s->mb_intra= 0;
3141                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3142                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3143                     break;
3144                 case CANDIDATE_MB_TYPE_FORWARD:
3145                     s->mv_dir = MV_DIR_FORWARD;
3146                     s->mb_intra= 0;
3147                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3148                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3149                     break;
3150                 case CANDIDATE_MB_TYPE_FORWARD_I:
3151                     s->mv_dir = MV_DIR_FORWARD;
3152                     s->mv_type = MV_TYPE_FIELD;
3153                     s->mb_intra= 0;
3154                     for(i=0; i<2; i++){
3155                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3156                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3157                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3158                     }
3159                     break;
3160                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3161                     s->mv_dir = MV_DIR_BACKWARD;
3162                     s->mv_type = MV_TYPE_FIELD;
3163                     s->mb_intra= 0;
3164                     for(i=0; i<2; i++){
3165                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3166                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3167                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3168                     }
3169                     break;
3170                 case CANDIDATE_MB_TYPE_BIDIR_I:
3171                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3172                     s->mv_type = MV_TYPE_FIELD;
3173                     s->mb_intra= 0;
3174                     for(dir=0; dir<2; dir++){
3175                         for(i=0; i<2; i++){
3176                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3177                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3178                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3179                         }
3180                     }
3181                     break;
3182                 default:
3183                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3184                 }
3185
3186                 encode_mb(s, motion_x, motion_y);
3187
3188                 // RAL: Update last macroblock type
3189                 s->last_mv_dir = s->mv_dir;
3190
3191                 if (CONFIG_H263_ENCODER &&
3192                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3193                     ff_h263_update_motion_val(s);
3194
3195                 ff_mpv_decode_mb(s, s->block);
3196             }
3197
3198             /* clean the MV table in IPS frames for direct mode in B frames */
3199             if(s->mb_intra /* && I,P,S_TYPE */){
3200                 s->p_mv_table[xy][0]=0;
3201                 s->p_mv_table[xy][1]=0;
3202             }
3203
3204             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3205                 int w= 16;
3206                 int h= 16;
3207
3208                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3209                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3210
3211                 s->current_picture.encoding_error[0] += sse(
3212                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3213                     s->dest[0], w, h, s->linesize);
3214                 s->current_picture.encoding_error[1] += sse(
3215                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3216                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3217                 s->current_picture.encoding_error[2] += sse(
3218                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3219                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3220             }
3221             if(s->loop_filter){
3222                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3223                     ff_h263_loop_filter(s);
3224             }
3225             ff_dlog(s->avctx, "MB %d %d bits\n",
3226                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3227         }
3228     }
3229
3230     //not beautiful here but we must write it before flushing so it has to be here
3231     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3232         ff_msmpeg4_encode_ext_header(s);
3233
3234     write_slice_end(s);
3235
3236 #if FF_API_RTP_CALLBACK
3237 FF_DISABLE_DEPRECATION_WARNINGS
3238     /* Send the last GOB if RTP */
3239     if (s->avctx->rtp_callback) {
3240         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3241         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3242         /* Call the RTP callback to send the last GOB */
3243         emms_c();
3244         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3245     }
3246 FF_ENABLE_DEPRECATION_WARNINGS
3247 #endif
3248
3249     return 0;
3250 }
3251
3252 #define MERGE(field) dst->field += src->field; src->field=0
3253 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3254     MERGE(me.scene_change_score);
3255     MERGE(me.mc_mb_var_sum_temp);
3256     MERGE(me.mb_var_sum_temp);
3257 }
3258
3259 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3260     int i;
3261
3262     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3263     MERGE(dct_count[1]);
3264     MERGE(mv_bits);
3265     MERGE(i_tex_bits);
3266     MERGE(p_tex_bits);
3267     MERGE(i_count);
3268     MERGE(f_count);
3269     MERGE(b_count);
3270     MERGE(skip_count);
3271     MERGE(misc_bits);
3272     MERGE(er.error_count);
3273     MERGE(padding_bug_score);
3274     MERGE(current_picture.encoding_error[0]);
3275     MERGE(current_picture.encoding_error[1]);
3276     MERGE(current_picture.encoding_error[2]);
3277
3278     if(dst->avctx->noise_reduction){
3279         for(i=0; i<64; i++){
3280             MERGE(dct_error_sum[0][i]);
3281             MERGE(dct_error_sum[1][i]);
3282         }
3283     }
3284
3285     assert(put_bits_count(&src->pb) % 8 ==0);
3286     assert(put_bits_count(&dst->pb) % 8 ==0);
3287     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3288     flush_put_bits(&dst->pb);
3289 }
3290
3291 static int estimate_qp(MpegEncContext *s, int dry_run){
3292     if (s->next_lambda){
3293         s->current_picture_ptr->f->quality =
3294         s->current_picture.f->quality = s->next_lambda;
3295         if(!dry_run) s->next_lambda= 0;
3296     } else if (!s->fixed_qscale) {
3297         s->current_picture_ptr->f->quality =
3298         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3299         if (s->current_picture.f->quality < 0)
3300             return -1;
3301     }
3302
3303     if(s->adaptive_quant){
3304         switch(s->codec_id){
3305         case AV_CODEC_ID_MPEG4:
3306             if (CONFIG_MPEG4_ENCODER)
3307                 ff_clean_mpeg4_qscales(s);
3308             break;
3309         case AV_CODEC_ID_H263:
3310         case AV_CODEC_ID_H263P:
3311         case AV_CODEC_ID_FLV1:
3312             if (CONFIG_H263_ENCODER)
3313                 ff_clean_h263_qscales(s);
3314             break;
3315         default:
3316             ff_init_qscale_tab(s);
3317         }
3318
3319         s->lambda= s->lambda_table[0];
3320         //FIXME broken
3321     }else
3322         s->lambda = s->current_picture.f->quality;
3323     update_qscale(s);
3324     return 0;
3325 }
3326
3327 /* must be called before writing the header */
3328 static void set_frame_distances(MpegEncContext * s){
3329     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3330     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3331
3332     if(s->pict_type==AV_PICTURE_TYPE_B){
3333         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3334         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3335     }else{
3336         s->pp_time= s->time - s->last_non_b_time;
3337         s->last_non_b_time= s->time;
3338         assert(s->picture_number==0 || s->pp_time > 0);
3339     }
3340 }
3341
3342 static int encode_picture(MpegEncContext *s, int picture_number)
3343 {
3344     int i, ret;
3345     int bits;
3346     int context_count = s->slice_context_count;
3347
3348     s->picture_number = picture_number;
3349
3350     /* Reset the average MB variance */
3351     s->me.mb_var_sum_temp    =
3352     s->me.mc_mb_var_sum_temp = 0;
3353
3354     /* we need to initialize some time vars before we can encode b-frames */
3355     // RAL: Condition added for MPEG1VIDEO
3356     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3357         set_frame_distances(s);
3358     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3359         ff_set_mpeg4_time(s);
3360
3361     s->me.scene_change_score=0;
3362
3363 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3364
3365     if(s->pict_type==AV_PICTURE_TYPE_I){
3366         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3367         else                        s->no_rounding=0;
3368     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3369         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3370             s->no_rounding ^= 1;
3371     }
3372
3373     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3374         if (estimate_qp(s,1) < 0)
3375             return -1;
3376         ff_get_2pass_fcode(s);
3377     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3378         if(s->pict_type==AV_PICTURE_TYPE_B)
3379             s->lambda= s->last_lambda_for[s->pict_type];
3380         else
3381             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3382         update_qscale(s);
3383     }
3384
3385     s->mb_intra=0; //for the rate distortion & bit compare functions
3386     for(i=1; i<context_count; i++){
3387         ret = ff_update_duplicate_context(s->thread_context[i], s);
3388         if (ret < 0)
3389             return ret;
3390     }
3391
3392     if(ff_init_me(s)<0)
3393         return -1;
3394
3395     /* Estimate motion for every MB */
3396     if(s->pict_type != AV_PICTURE_TYPE_I){
3397         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3398         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3399         if (s->pict_type != AV_PICTURE_TYPE_B) {
3400             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3401                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3402             }
3403         }
3404
3405         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3406     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3407         /* I-Frame */
3408         for(i=0; i<s->mb_stride*s->mb_height; i++)
3409             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3410
3411         if(!s->fixed_qscale){
3412             /* finding spatial complexity for I-frame rate control */
3413             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3414         }
3415     }
3416     for(i=1; i<context_count; i++){
3417         merge_context_after_me(s, s->thread_context[i]);
3418     }
3419     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3420     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3421     emms_c();
3422
3423     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3424         s->pict_type= AV_PICTURE_TYPE_I;
3425         for(i=0; i<s->mb_stride*s->mb_height; i++)
3426             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3427         ff_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3428                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3429     }
3430
3431     if(!s->umvplus){
3432         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3433             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3434
3435             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3436                 int a,b;
3437                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3438                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3439                 s->f_code= FFMAX3(s->f_code, a, b);
3440             }
3441
3442             ff_fix_long_p_mvs(s);
3443             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3444             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3445                 int j;
3446                 for(i=0; i<2; i++){
3447                     for(j=0; j<2; j++)
3448                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3449                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3450                 }
3451             }
3452         }
3453
3454         if(s->pict_type==AV_PICTURE_TYPE_B){
3455             int a, b;
3456
3457             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3458             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3459             s->f_code = FFMAX(a, b);
3460
3461             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3462             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3463             s->b_code = FFMAX(a, b);
3464
3465             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3466             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3467             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3468             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3469             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3470                 int dir, j;
3471                 for(dir=0; dir<2; dir++){
3472                     for(i=0; i<2; i++){
3473                         for(j=0; j<2; j++){
3474                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3475                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3476                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3477                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3478                         }
3479                     }
3480                 }
3481             }
3482         }
3483     }
3484
3485     if (estimate_qp(s, 0) < 0)
3486         return -1;
3487
3488     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3489         s->pict_type == AV_PICTURE_TYPE_I &&
3490         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3491         s->qscale= 3; //reduce clipping problems
3492
3493     if (s->out_format == FMT_MJPEG) {
3494         /* for mjpeg, we do include qscale in the matrix */
3495         for(i=1;i<64;i++){
3496             int j = s->idsp.idct_permutation[i];
3497
3498             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3499         }
3500         s->y_dc_scale_table=
3501         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3502         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3503         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3504                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3505         s->qscale= 8;
3506     }
3507
3508     //FIXME var duplication
3509     s->current_picture_ptr->f->key_frame =
3510     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3511     s->current_picture_ptr->f->pict_type =
3512     s->current_picture.f->pict_type = s->pict_type;
3513
3514     if (s->current_picture.f->key_frame)
3515         s->picture_in_gop_number=0;
3516
3517     s->last_bits= put_bits_count(&s->pb);
3518     switch(s->out_format) {
3519     case FMT_MJPEG:
3520         if (CONFIG_MJPEG_ENCODER)
3521             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3522                                            s->intra_matrix);
3523         break;
3524     case FMT_H261:
3525         if (CONFIG_H261_ENCODER)
3526             ff_h261_encode_picture_header(s, picture_number);
3527         break;
3528     case FMT_H263:
3529         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3530             ff_wmv2_encode_picture_header(s, picture_number);
3531         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3532             ff_msmpeg4_encode_picture_header(s, picture_number);
3533         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3534             ff_mpeg4_encode_picture_header(s, picture_number);
3535         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3536             ret = ff_rv10_encode_picture_header(s, picture_number);
3537             if (ret < 0)
3538                 return ret;
3539         }
3540         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3541             ff_rv20_encode_picture_header(s, picture_number);
3542         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3543             ff_flv_encode_picture_header(s, picture_number);
3544         else if (CONFIG_H263_ENCODER)
3545             ff_h263_encode_picture_header(s, picture_number);
3546         break;
3547     case FMT_MPEG1:
3548         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3549             ff_mpeg1_encode_picture_header(s, picture_number);
3550         break;
3551     default:
3552         assert(0);
3553     }
3554     bits= put_bits_count(&s->pb);
3555     s->header_bits= bits - s->last_bits;
3556
3557     for(i=1; i<context_count; i++){
3558         update_duplicate_context_after_me(s->thread_context[i], s);
3559     }
3560     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3561     for(i=1; i<context_count; i++){
3562         merge_context_after_encode(s, s->thread_context[i]);
3563     }
3564     emms_c();
3565     return 0;
3566 }
3567
3568 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3569     const int intra= s->mb_intra;
3570     int i;
3571
3572     s->dct_count[intra]++;
3573
3574     for(i=0; i<64; i++){
3575         int level= block[i];
3576
3577         if(level){
3578             if(level>0){
3579                 s->dct_error_sum[intra][i] += level;
3580                 level -= s->dct_offset[intra][i];
3581                 if(level<0) level=0;
3582             }else{
3583                 s->dct_error_sum[intra][i] -= level;
3584                 level += s->dct_offset[intra][i];
3585                 if(level>0) level=0;
3586             }
3587             block[i]= level;
3588         }
3589     }
3590 }
3591
3592 static int dct_quantize_trellis_c(MpegEncContext *s,
3593                                   int16_t *block, int n,
3594                                   int qscale, int *overflow){
3595     const int *qmat;
3596     const uint8_t *scantable= s->intra_scantable.scantable;
3597     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3598     int max=0;
3599     unsigned int threshold1, threshold2;
3600     int bias=0;
3601     int run_tab[65];
3602     int level_tab[65];
3603     int score_tab[65];
3604     int survivor[65];
3605     int survivor_count;
3606     int last_run=0;
3607     int last_level=0;
3608     int last_score= 0;
3609     int last_i;
3610     int coeff[2][64];
3611     int coeff_count[64];
3612     int qmul, qadd, start_i, last_non_zero, i, dc;
3613     const int esc_length= s->ac_esc_length;
3614     uint8_t * length;
3615     uint8_t * last_length;
3616     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3617
3618     s->fdsp.fdct(block);
3619
3620     if(s->dct_error_sum)
3621         s->denoise_dct(s, block);
3622     qmul= qscale*16;
3623     qadd= ((qscale-1)|1)*8;
3624
3625     if (s->mb_intra) {
3626         int q;
3627         if (!s->h263_aic) {
3628             if (n < 4)
3629                 q = s->y_dc_scale;
3630             else
3631                 q = s->c_dc_scale;
3632             q = q << 3;
3633         } else{
3634             /* For AIC we skip quant/dequant of INTRADC */
3635             q = 1 << 3;
3636             qadd=0;
3637         }
3638
3639         /* note: block[0] is assumed to be positive */
3640         block[0] = (block[0] + (q >> 1)) / q;
3641         start_i = 1;
3642         last_non_zero = 0;
3643         qmat = s->q_intra_matrix[qscale];
3644         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3645             bias= 1<<(QMAT_SHIFT-1);
3646         length     = s->intra_ac_vlc_length;
3647         last_length= s->intra_ac_vlc_last_length;
3648     } else {
3649         start_i = 0;
3650         last_non_zero = -1;
3651         qmat = s->q_inter_matrix[qscale];
3652         length     = s->inter_ac_vlc_length;
3653         last_length= s->inter_ac_vlc_last_length;
3654     }
3655     last_i= start_i;
3656
3657     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3658     threshold2= (threshold1<<1);
3659
3660     for(i=63; i>=start_i; i--) {
3661         const int j = scantable[i];
3662         int level = block[j] * qmat[j];
3663
3664         if(((unsigned)(level+threshold1))>threshold2){
3665             last_non_zero = i;
3666             break;
3667         }
3668     }
3669
3670     for(i=start_i; i<=last_non_zero; i++) {
3671         const int j = scantable[i];
3672         int level = block[j] * qmat[j];
3673
3674 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3675 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3676         if(((unsigned)(level+threshold1))>threshold2){
3677             if(level>0){
3678                 level= (bias + level)>>QMAT_SHIFT;
3679                 coeff[0][i]= level;
3680                 coeff[1][i]= level-1;
3681 //                coeff[2][k]= level-2;
3682             }else{
3683                 level= (bias - level)>>QMAT_SHIFT;
3684                 coeff[0][i]= -level;
3685                 coeff[1][i]= -level+1;
3686 //                coeff[2][k]= -level+2;
3687             }
3688             coeff_count[i]= FFMIN(level, 2);
3689             assert(coeff_count[i]);
3690             max |=level;
3691         }else{
3692             coeff[0][i]= (level>>31)|1;
3693             coeff_count[i]= 1;
3694         }
3695     }
3696
3697     *overflow= s->max_qcoeff < max; //overflow might have happened
3698
3699     if(last_non_zero < start_i){
3700         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3701         return last_non_zero;
3702     }
3703
3704     score_tab[start_i]= 0;
3705     survivor[0]= start_i;
3706     survivor_count= 1;
3707
3708     for(i=start_i; i<=last_non_zero; i++){
3709         int level_index, j, zero_distortion;
3710         int dct_coeff= FFABS(block[ scantable[i] ]);
3711         int best_score=256*256*256*120;
3712
3713         if (s->fdsp.fdct == ff_fdct_ifast)
3714             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3715         zero_distortion= dct_coeff*dct_coeff;
3716
3717         for(level_index=0; level_index < coeff_count[i]; level_index++){
3718             int distortion;
3719             int level= coeff[level_index][i];
3720             const int alevel= FFABS(level);
3721             int unquant_coeff;
3722
3723             assert(level);
3724
3725             if(s->out_format == FMT_H263){
3726                 unquant_coeff= alevel*qmul + qadd;
3727             }else{ //MPEG1
3728                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3729                 if(s->mb_intra){
3730                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3731                         unquant_coeff =   (unquant_coeff - 1) | 1;
3732                 }else{
3733                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3734                         unquant_coeff =   (unquant_coeff - 1) | 1;
3735                 }
3736                 unquant_coeff<<= 3;
3737             }
3738
3739             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3740             level+=64;
3741             if((level&(~127)) == 0){
3742                 for(j=survivor_count-1; j>=0; j--){
3743                     int run= i - survivor[j];
3744                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3745                     score += score_tab[i-run];
3746
3747                     if(score < best_score){
3748                         best_score= score;
3749                         run_tab[i+1]= run;
3750                         level_tab[i+1]= level-64;
3751                     }
3752                 }
3753
3754                 if(s->out_format == FMT_H263){
3755                     for(j=survivor_count-1; j>=0; j--){
3756                         int run= i - survivor[j];
3757                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3758                         score += score_tab[i-run];
3759                         if(score < last_score){
3760                             last_score= score;
3761                             last_run= run;
3762                             last_level= level-64;
3763                             last_i= i+1;
3764                         }
3765                     }
3766                 }
3767             }else{
3768                 distortion += esc_length*lambda;
3769                 for(j=survivor_count-1; j>=0; j--){
3770                     int run= i - survivor[j];
3771                     int score= distortion + score_tab[i-run];
3772
3773                     if(score < best_score){
3774                         best_score= score;
3775                         run_tab[i+1]= run;
3776                         level_tab[i+1]= level-64;
3777                     }
3778                 }
3779
3780                 if(s->out_format == FMT_H263){
3781                   for(j=survivor_count-1; j>=0; j--){
3782                         int run= i - survivor[j];
3783                         int score= distortion + score_tab[i-run];
3784                         if(score < last_score){
3785                             last_score= score;
3786                             last_run= run;
3787                             last_level= level-64;
3788                             last_i= i+1;
3789                         }
3790                     }
3791                 }
3792             }
3793         }
3794
3795         score_tab[i+1]= best_score;
3796
3797         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3798         if(last_non_zero <= 27){
3799             for(; survivor_count; survivor_count--){
3800                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3801                     break;
3802             }
3803         }else{
3804             for(; survivor_count; survivor_count--){
3805                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3806                     break;
3807             }
3808         }
3809
3810         survivor[ survivor_count++ ]= i+1;
3811     }
3812
3813     if(s->out_format != FMT_H263){
3814         last_score= 256*256*256*120;
3815         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3816             int score= score_tab[i];
3817             if(i) score += lambda*2; //FIXME exacter?
3818
3819             if(score < last_score){
3820                 last_score= score;
3821                 last_i= i;
3822                 last_level= level_tab[i];
3823                 last_run= run_tab[i];
3824             }
3825         }
3826     }
3827
3828     s->coded_score[n] = last_score;
3829
3830     dc= FFABS(block[0]);
3831     last_non_zero= last_i - 1;
3832     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3833
3834     if(last_non_zero < start_i)
3835         return last_non_zero;
3836
3837     if(last_non_zero == 0 && start_i == 0){
3838         int best_level= 0;
3839         int best_score= dc * dc;
3840
3841         for(i=0; i<coeff_count[0]; i++){
3842             int level= coeff[i][0];
3843             int alevel= FFABS(level);
3844             int unquant_coeff, score, distortion;
3845
3846             if(s->out_format == FMT_H263){
3847                     unquant_coeff= (alevel*qmul + qadd)>>3;
3848             }else{ //MPEG1
3849                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3850                     unquant_coeff =   (unquant_coeff - 1) | 1;
3851             }
3852             unquant_coeff = (unquant_coeff + 4) >> 3;
3853             unquant_coeff<<= 3 + 3;
3854
3855             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3856             level+=64;
3857             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3858             else                    score= distortion + esc_length*lambda;
3859
3860             if(score < best_score){
3861                 best_score= score;
3862                 best_level= level - 64;
3863             }
3864         }
3865         block[0]= best_level;
3866         s->coded_score[n] = best_score - dc*dc;
3867         if(best_level == 0) return -1;
3868         else                return last_non_zero;
3869     }
3870
3871     i= last_i;
3872     assert(last_level);
3873
3874     block[ perm_scantable[last_non_zero] ]= last_level;
3875     i -= last_run + 1;
3876
3877     for(; i>start_i; i -= run_tab[i] + 1){
3878         block[ perm_scantable[i-1] ]= level_tab[i];
3879     }
3880
3881     return last_non_zero;
3882 }
3883
3884 //#define REFINE_STATS 1
3885 static int16_t basis[64][64];
3886
3887 static void build_basis(uint8_t *perm){
3888     int i, j, x, y;
3889     emms_c();
3890     for(i=0; i<8; i++){
3891         for(j=0; j<8; j++){
3892             for(y=0; y<8; y++){
3893                 for(x=0; x<8; x++){
3894                     double s= 0.25*(1<<BASIS_SHIFT);
3895                     int index= 8*i + j;
3896                     int perm_index= perm[index];
3897                     if(i==0) s*= sqrt(0.5);
3898                     if(j==0) s*= sqrt(0.5);
3899                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3900                 }
3901             }
3902         }
3903     }
3904 }
3905
3906 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3907                         int16_t *block, int16_t *weight, int16_t *orig,
3908                         int n, int qscale){
3909     int16_t rem[64];
3910     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3911     const uint8_t *scantable= s->intra_scantable.scantable;
3912     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3913 //    unsigned int threshold1, threshold2;
3914 //    int bias=0;
3915     int run_tab[65];
3916     int prev_run=0;
3917     int prev_level=0;
3918     int qmul, qadd, start_i, last_non_zero, i, dc;
3919     uint8_t * length;
3920     uint8_t * last_length;
3921     int lambda;
3922     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3923 #ifdef REFINE_STATS
3924 static int count=0;
3925 static int after_last=0;
3926 static int to_zero=0;
3927 static int from_zero=0;
3928 static int raise=0;
3929 static int lower=0;
3930 static int messed_sign=0;
3931 #endif
3932
3933     if(basis[0][0] == 0)
3934         build_basis(s->idsp.idct_permutation);
3935
3936     qmul= qscale*2;
3937     qadd= (qscale-1)|1;
3938     if (s->mb_intra) {
3939         if (!s->h263_aic) {
3940             if (n < 4)
3941                 q = s->y_dc_scale;
3942             else
3943                 q = s->c_dc_scale;
3944         } else{
3945             /* For AIC we skip quant/dequant of INTRADC */
3946             q = 1;
3947             qadd=0;
3948         }
3949         q <<= RECON_SHIFT-3;
3950         /* note: block[0] is assumed to be positive */
3951         dc= block[0]*q;
3952 //        block[0] = (block[0] + (q >> 1)) / q;
3953         start_i = 1;
3954 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3955 //            bias= 1<<(QMAT_SHIFT-1);
3956         length     = s->intra_ac_vlc_length;
3957         last_length= s->intra_ac_vlc_last_length;
3958     } else {
3959         dc= 0;
3960         start_i = 0;
3961         length     = s->inter_ac_vlc_length;
3962         last_length= s->inter_ac_vlc_last_length;
3963     }
3964     last_non_zero = s->block_last_index[n];
3965
3966 #ifdef REFINE_STATS
3967 {START_TIMER
3968 #endif
3969     dc += (1<<(RECON_SHIFT-1));
3970     for(i=0; i<64; i++){
3971         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3972     }
3973 #ifdef REFINE_STATS
3974 STOP_TIMER("memset rem[]")}
3975 #endif
3976     sum=0;
3977     for(i=0; i<64; i++){
3978         int one= 36;
3979         int qns=4;
3980         int w;
3981
3982         w= FFABS(weight[i]) + qns*one;
3983         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3984
3985         weight[i] = w;
3986 //        w=weight[i] = (63*qns + (w/2)) / w;
3987
3988         assert(w>0);
3989         assert(w<(1<<6));
3990         sum += w*w;
3991     }
3992     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3993 #ifdef REFINE_STATS
3994 {START_TIMER
3995 #endif
3996     run=0;
3997     rle_index=0;
3998     for(i=start_i; i<=last_non_zero; i++){
3999         int j= perm_scantable[i];
4000         const int level= block[j];
4001         int coeff;
4002
4003         if(level){
4004             if(level<0) coeff= qmul*level - qadd;
4005             else        coeff= qmul*level + qadd;
4006             run_tab[rle_index++]=run;
4007             run=0;
4008
4009             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4010         }else{
4011             run++;
4012         }
4013     }
4014 #ifdef REFINE_STATS
4015 if(last_non_zero>0){
4016 STOP_TIMER("init rem[]")
4017 }
4018 }
4019
4020 {START_TIMER
4021 #endif
4022     for(;;){
4023         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4024         int best_coeff=0;
4025         int best_change=0;
4026         int run2, best_unquant_change=0, analyze_gradient;
4027 #ifdef REFINE_STATS
4028 {START_TIMER
4029 #endif
4030         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4031
4032         if(analyze_gradient){
4033 #ifdef REFINE_STATS
4034 {START_TIMER
4035 #endif
4036             for(i=0; i<64; i++){
4037                 int w= weight[i];
4038
4039                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4040             }
4041 #ifdef REFINE_STATS
4042 STOP_TIMER("rem*w*w")}
4043 {START_TIMER
4044 #endif
4045             s->fdsp.fdct(d1);
4046 #ifdef REFINE_STATS
4047 STOP_TIMER("dct")}
4048 #endif
4049         }
4050
4051         if(start_i){
4052             const int level= block[0];
4053             int change, old_coeff;
4054
4055             assert(s->mb_intra);
4056
4057             old_coeff= q*level;
4058
4059             for(change=-1; change<=1; change+=2){
4060                 int new_level= level + change;
4061                 int score, new_coeff;
4062
4063                 new_coeff= q*new_level;
4064                 if(new_coeff >= 2048 || new_coeff < 0)
4065                     continue;
4066
4067                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4068                                                   new_coeff - old_coeff);
4069                 if(score<best_score){
4070                     best_score= score;
4071                     best_coeff= 0;
4072                     best_change= change;
4073                     best_unquant_change= new_coeff - old_coeff;
4074                 }
4075             }
4076         }
4077
4078         run=0;
4079         rle_index=0;
4080         run2= run_tab[rle_index++];
4081         prev_level=0;
4082         prev_run=0;
4083
4084         for(i=start_i; i<64; i++){
4085             int j= perm_scantable[i];
4086             const int level= block[j];
4087             int change, old_coeff;
4088
4089             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4090                 break;
4091
4092             if(level){
4093                 if(level<0) old_coeff= qmul*level - qadd;
4094                 else        old_coeff= qmul*level + qadd;
4095                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4096             }else{
4097                 old_coeff=0;
4098                 run2--;
4099                 assert(run2>=0 || i >= last_non_zero );
4100             }
4101
4102             for(change=-1; change<=1; change+=2){
4103                 int new_level= level + change;
4104                 int score, new_coeff, unquant_change;
4105
4106                 score=0;
4107                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4108                    continue;
4109
4110                 if(new_level){
4111                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4112                     else            new_coeff= qmul*new_level + qadd;
4113                     if(new_coeff >= 2048 || new_coeff <= -2048)
4114                         continue;
4115                     //FIXME check for overflow
4116
4117                     if(level){
4118                         if(level < 63 && level > -63){
4119                             if(i < last_non_zero)
4120                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4121                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4122                             else
4123                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4124                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4125                         }
4126                     }else{
4127                         assert(FFABS(new_level)==1);
4128
4129                         if(analyze_gradient){
4130                             int g= d1[ scantable[i] ];
4131                             if(g && (g^new_level) >= 0)
4132                                 continue;
4133                         }
4134
4135                         if(i < last_non_zero){
4136                             int next_i= i + run2 + 1;
4137                             int next_level= block[ perm_scantable[next_i] ] + 64;
4138
4139                             if(next_level&(~127))
4140                                 next_level= 0;
4141
4142                             if(next_i < last_non_zero)
4143                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4144                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4145                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4146                             else
4147                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4148                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4149                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4150                         }else{
4151                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4152                             if(prev_level){
4153                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4154                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4155                             }
4156                         }
4157                     }
4158                 }else{
4159                     new_coeff=0;
4160                     assert(FFABS(level)==1);
4161
4162                     if(i < last_non_zero){
4163                         int next_i= i + run2 + 1;
4164                         int next_level= block[ perm_scantable[next_i] ] + 64;
4165
4166                         if(next_level&(~127))
4167                             next_level= 0;
4168
4169                         if(next_i < last_non_zero)
4170                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4171                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4172                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4173                         else
4174                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4175                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4176                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4177                     }else{
4178                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4179                         if(prev_level){
4180                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4181                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4182                         }
4183                     }
4184                 }
4185
4186                 score *= lambda;
4187
4188                 unquant_change= new_coeff - old_coeff;
4189                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4190
4191                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4192                                                    unquant_change);
4193                 if(score<best_score){
4194                     best_score= score;
4195                     best_coeff= i;
4196                     best_change= change;
4197                     best_unquant_change= unquant_change;
4198                 }
4199             }
4200             if(level){
4201                 prev_level= level + 64;
4202                 if(prev_level&(~127))
4203                     prev_level= 0;
4204                 prev_run= run;
4205                 run=0;
4206             }else{
4207                 run++;
4208             }
4209         }
4210 #ifdef REFINE_STATS
4211 STOP_TIMER("iterative step")}
4212 #endif
4213
4214         if(best_change){
4215             int j= perm_scantable[ best_coeff ];
4216
4217             block[j] += best_change;
4218
4219             if(best_coeff > last_non_zero){
4220                 last_non_zero= best_coeff;
4221                 assert(block[j]);
4222 #ifdef REFINE_STATS
4223 after_last++;
4224 #endif
4225             }else{
4226 #ifdef REFINE_STATS
4227 if(block[j]){
4228     if(block[j] - best_change){
4229         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4230             raise++;
4231         }else{
4232             lower++;
4233         }
4234     }else{
4235         from_zero++;
4236     }
4237 }else{
4238     to_zero++;
4239 }
4240 #endif
4241                 for(; last_non_zero>=start_i; last_non_zero--){
4242                     if(block[perm_scantable[last_non_zero]])
4243                         break;
4244                 }
4245             }
4246 #ifdef REFINE_STATS
4247 count++;
4248 if(256*256*256*64 % count == 0){
4249     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4250 }
4251 #endif
4252             run=0;
4253             rle_index=0;
4254             for(i=start_i; i<=last_non_zero; i++){
4255                 int j= perm_scantable[i];
4256                 const int level= block[j];
4257
4258                  if(level){
4259                      run_tab[rle_index++]=run;
4260                      run=0;
4261                  }else{
4262                      run++;
4263                  }
4264             }
4265
4266             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4267         }else{
4268             break;
4269         }
4270     }
4271 #ifdef REFINE_STATS
4272 if(last_non_zero>0){
4273 STOP_TIMER("iterative search")
4274 }
4275 }
4276 #endif
4277
4278     return last_non_zero;
4279 }
4280
4281 /**
4282  * Permute an 8x8 block according to permuatation.
4283  * @param block the block which will be permuted according to
4284  *              the given permutation vector
4285  * @param permutation the permutation vector
4286  * @param last the last non zero coefficient in scantable order, used to
4287  *             speed the permutation up
4288  * @param scantable the used scantable, this is only used to speed the
4289  *                  permutation up, the block is not (inverse) permutated
4290  *                  to scantable order!
4291  */
4292 static void block_permute(int16_t *block, uint8_t *permutation,
4293                           const uint8_t *scantable, int last)
4294 {
4295     int i;
4296     int16_t temp[64];
4297
4298     if (last <= 0)
4299         return;
4300     //FIXME it is ok but not clean and might fail for some permutations
4301     // if (permutation[1] == 1)
4302     // return;
4303
4304     for (i = 0; i <= last; i++) {
4305         const int j = scantable[i];
4306         temp[j] = block[j];
4307         block[j] = 0;
4308     }
4309
4310     for (i = 0; i <= last; i++) {
4311         const int j = scantable[i];
4312         const int perm_j = permutation[j];
4313         block[perm_j] = temp[j];
4314     }
4315 }
4316
4317 int ff_dct_quantize_c(MpegEncContext *s,
4318                         int16_t *block, int n,
4319                         int qscale, int *overflow)
4320 {
4321     int i, j, level, last_non_zero, q, start_i;
4322     const int *qmat;
4323     const uint8_t *scantable= s->intra_scantable.scantable;
4324     int bias;
4325     int max=0;
4326     unsigned int threshold1, threshold2;
4327
4328     s->fdsp.fdct(block);
4329
4330     if(s->dct_error_sum)
4331         s->denoise_dct(s, block);
4332
4333     if (s->mb_intra) {
4334         if (!s->h263_aic) {
4335             if (n < 4)
4336                 q = s->y_dc_scale;
4337             else
4338                 q = s->c_dc_scale;
4339             q = q << 3;
4340         } else
4341             /* For AIC we skip quant/dequant of INTRADC */
4342             q = 1 << 3;
4343
4344         /* note: block[0] is assumed to be positive */
4345         block[0] = (block[0] + (q >> 1)) / q;
4346         start_i = 1;
4347         last_non_zero = 0;
4348         qmat = s->q_intra_matrix[qscale];
4349         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4350     } else {
4351         start_i = 0;
4352         last_non_zero = -1;
4353         qmat = s->q_inter_matrix[qscale];
4354         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4355     }
4356     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4357     threshold2= (threshold1<<1);
4358     for(i=63;i>=start_i;i--) {
4359         j = scantable[i];
4360         level = block[j] * qmat[j];
4361
4362         if(((unsigned)(level+threshold1))>threshold2){
4363             last_non_zero = i;
4364             break;
4365         }else{
4366             block[j]=0;
4367         }
4368     }
4369     for(i=start_i; i<=last_non_zero; i++) {
4370         j = scantable[i];
4371         level = block[j] * qmat[j];
4372
4373 //        if(   bias+level >= (1<<QMAT_SHIFT)
4374 //           || bias-level >= (1<<QMAT_SHIFT)){
4375         if(((unsigned)(level+threshold1))>threshold2){
4376             if(level>0){
4377                 level= (bias + level)>>QMAT_SHIFT;
4378                 block[j]= level;
4379             }else{
4380                 level= (bias - level)>>QMAT_SHIFT;
4381                 block[j]= -level;
4382             }
4383             max |=level;
4384         }else{
4385             block[j]=0;
4386         }
4387     }
4388     *overflow= s->max_qcoeff < max; //overflow might have happened
4389
4390     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4391     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4392         block_permute(block, s->idsp.idct_permutation,
4393                       scantable, last_non_zero);
4394
4395     return last_non_zero;
4396 }
4397
4398 #define OFFSET(x) offsetof(MpegEncContext, x)
4399 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4400 static const AVOption h263_options[] = {
4401     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4402     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4403     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4404     FF_MPV_COMMON_OPTS
4405     { NULL },
4406 };
4407
4408 static const AVClass h263_class = {
4409     .class_name = "H.263 encoder",
4410     .item_name  = av_default_item_name,
4411     .option     = h263_options,
4412     .version    = LIBAVUTIL_VERSION_INT,
4413 };
4414
4415 AVCodec ff_h263_encoder = {
4416     .name           = "h263",
4417     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4418     .type           = AVMEDIA_TYPE_VIDEO,
4419     .id             = AV_CODEC_ID_H263,
4420     .priv_data_size = sizeof(MpegEncContext),
4421     .init           = ff_mpv_encode_init,
4422     .encode2        = ff_mpv_encode_picture,
4423     .close          = ff_mpv_encode_end,
4424     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4425     .priv_class     = &h263_class,
4426 };
4427
4428 static const AVOption h263p_options[] = {
4429     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4430     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4431     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4432     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4433     FF_MPV_COMMON_OPTS
4434     { NULL },
4435 };
4436 static const AVClass h263p_class = {
4437     .class_name = "H.263p encoder",
4438     .item_name  = av_default_item_name,
4439     .option     = h263p_options,
4440     .version    = LIBAVUTIL_VERSION_INT,
4441 };
4442
4443 AVCodec ff_h263p_encoder = {
4444     .name           = "h263p",
4445     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4446     .type           = AVMEDIA_TYPE_VIDEO,
4447     .id             = AV_CODEC_ID_H263P,
4448     .priv_data_size = sizeof(MpegEncContext),
4449     .init           = ff_mpv_encode_init,
4450     .encode2        = ff_mpv_encode_picture,
4451     .close          = ff_mpv_encode_end,
4452     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4453     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4454     .priv_class     = &h263p_class,
4455 };
4456
4457 static const AVClass msmpeg4v2_class = {
4458     .class_name = "msmpeg4v2 encoder",
4459     .item_name  = av_default_item_name,
4460     .option     = ff_mpv_generic_options,
4461     .version    = LIBAVUTIL_VERSION_INT,
4462 };
4463
4464 AVCodec ff_msmpeg4v2_encoder = {
4465     .name           = "msmpeg4v2",
4466     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4467     .type           = AVMEDIA_TYPE_VIDEO,
4468     .id             = AV_CODEC_ID_MSMPEG4V2,
4469     .priv_data_size = sizeof(MpegEncContext),
4470     .init           = ff_mpv_encode_init,
4471     .encode2        = ff_mpv_encode_picture,
4472     .close          = ff_mpv_encode_end,
4473     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4474     .priv_class     = &msmpeg4v2_class,
4475 };
4476
4477 static const AVClass msmpeg4v3_class = {
4478     .class_name = "msmpeg4v3 encoder",
4479     .item_name  = av_default_item_name,
4480     .option     = ff_mpv_generic_options,
4481     .version    = LIBAVUTIL_VERSION_INT,
4482 };
4483
4484 AVCodec ff_msmpeg4v3_encoder = {
4485     .name           = "msmpeg4",
4486     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4487     .type           = AVMEDIA_TYPE_VIDEO,
4488     .id             = AV_CODEC_ID_MSMPEG4V3,
4489     .priv_data_size = sizeof(MpegEncContext),
4490     .init           = ff_mpv_encode_init,
4491     .encode2        = ff_mpv_encode_picture,
4492     .close          = ff_mpv_encode_end,
4493     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4494     .priv_class     = &msmpeg4v3_class,
4495 };
4496
4497 static const AVClass wmv1_class = {
4498     .class_name = "wmv1 encoder",
4499     .item_name  = av_default_item_name,
4500     .option     = ff_mpv_generic_options,
4501     .version    = LIBAVUTIL_VERSION_INT,
4502 };
4503
4504 AVCodec ff_wmv1_encoder = {
4505     .name           = "wmv1",
4506     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4507     .type           = AVMEDIA_TYPE_VIDEO,
4508     .id             = AV_CODEC_ID_WMV1,
4509     .priv_data_size = sizeof(MpegEncContext),
4510     .init           = ff_mpv_encode_init,
4511     .encode2        = ff_mpv_encode_picture,
4512     .close          = ff_mpv_encode_end,
4513     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4514     .priv_class     = &wmv1_class,
4515 };