git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "mpegvideodata.h"
  44 #include "h261.h"
  45 #include "h263.h"
  46 #include "h263data.h"
  47 #include "mjpegenc_common.h"
  48 #include "mathops.h"
  49 #include "mpegutils.h"
  50 #include "mjpegenc.h"
  51 #include "msmpeg4.h"
  52 #include "pixblockdsp.h"
  53 #include "qpeldsp.h"
  54 #include "faandct.h"
  55 #include "thread.h"
  56 #include "aandcttab.h"
  57 #include "flv.h"
  58 #include "mpeg4video.h"
  59 #include "internal.h"
  60 #include "bytestream.h"
  61 #include "wmv2.h"
  62 #include "rv10.h"
  63 #include <limits.h>
  64
  65 #define QUANT_BIAS_SHIFT 8
  66
  67 #define QMAT_SHIFT_MMX 16
  68 #define QMAT_SHIFT 22
  69
  70 static int encode_picture(MpegEncContext *s, int picture_number);
  71 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  72 static int sse_mb(MpegEncContext *s);
  73 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  74 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  75
  76 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  77 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  78
  79 const AVOption ff_mpv_generic_options[] = {
  80     FF_MPV_COMMON_OPTS
  81     { NULL },
  82 };
  83
  84 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  85                        uint16_t (*qmat16)[2][64],
  86                        const uint16_t *quant_matrix,
  87                        int bias, int qmin, int qmax, int intra)
  88 {
  89     FDCTDSPContext *fdsp = &s->fdsp;
  90     int qscale;
  91     int shift = 0;
  92
  93     for (qscale = qmin; qscale <= qmax; qscale++) {
  94         int i;
  95         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  96 #if CONFIG_FAANDCT
  97             fdsp->fdct == ff_faandct            ||
  98 #endif /* CONFIG_FAANDCT */
  99             fdsp->fdct == ff_jpeg_fdct_islow_10) {
 100             for (i = 0; i < 64; i++) {
 101                 const int j = s->idsp.idct_permutation[i];
 102                 int64_t den = (int64_t) qscale * quant_matrix[j];
 103                 /* 16 <= qscale * quant_matrix[i] <= 7905
 104                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 105                  *             19952 <=              x  <= 249205026
 106                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 107                  *           3444240 >= (1 << 36) / (x) >= 275 */
 108
 109                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 110             }
 111         } else if (fdsp->fdct == ff_fdct_ifast) {
 112             for (i = 0; i < 64; i++) {
 113                 const int j = s->idsp.idct_permutation[i];
 114                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
 115                 /* 16 <= qscale * quant_matrix[i] <= 7905
 116                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 117                  *             19952 <=              x  <= 249205026
 118                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 119                  *           3444240 >= (1 << 36) / (x) >= 275 */
 120
 121                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
 122             }
 123         } else {
 124             for (i = 0; i < 64; i++) {
 125                 const int j = s->idsp.idct_permutation[i];
 126                 int64_t den = (int64_t) qscale * quant_matrix[j];
 127                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 128                  * Assume x = qscale * quant_matrix[i]
 129                  * So             16 <=              x  <= 7905
 130                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 131                  * so          32768 >= (1 << 19) / (x) >= 67 */
 132                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 133                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 134                 //                    (qscale * quant_matrix[i]);
 135                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
 136
 137                 if (qmat16[qscale][0][i] == 0 ||
 138                     qmat16[qscale][0][i] == 128 * 256)
 139                     qmat16[qscale][0][i] = 128 * 256 - 1;
 140                 qmat16[qscale][1][i] =
 141                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 142                                 qmat16[qscale][0][i]);
 143             }
 144         }
 145
 146         for (i = intra; i < 64; i++) {
 147             int64_t max = 8191;
 148             if (fdsp->fdct == ff_fdct_ifast) {
 149                 max = (8191LL * ff_aanscales[i]) >> 14;
 150             }
 151             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 152                 shift++;
 153             }
 154         }
 155     }
 156     if (shift) {
 157         av_log(NULL, AV_LOG_INFO,
 158                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 159                QMAT_SHIFT - shift);
 160     }
 161 }
 162
 163 static inline void update_qscale(MpegEncContext *s)
 164 {
 165     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 166                 (FF_LAMBDA_SHIFT + 7);
 167     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 168
 169     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 170                  FF_LAMBDA_SHIFT;
 171 }
 172
 173 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 174 {
 175     int i;
 176
 177     if (matrix) {
 178         put_bits(pb, 1, 1);
 179         for (i = 0; i < 64; i++) {
 180             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 181         }
 182     } else
 183         put_bits(pb, 1, 0);
 184 }
 185
 186 /**
 187  * init s->current_picture.qscale_table from s->lambda_table
 188  */
 189 void ff_init_qscale_tab(MpegEncContext *s)
 190 {
 191     int8_t * const qscale_table = s->current_picture.qscale_table;
 192     int i;
 193
 194     for (i = 0; i < s->mb_num; i++) {
 195         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 196         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 197         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 198                                                   s->avctx->qmax);
 199     }
 200 }
 201
 202 static void update_duplicate_context_after_me(MpegEncContext *dst,
 203                                               MpegEncContext *src)
 204 {
 205 #define COPY(a) dst->a= src->a
 206     COPY(pict_type);
 207     COPY(current_picture);
 208     COPY(f_code);
 209     COPY(b_code);
 210     COPY(qscale);
 211     COPY(lambda);
 212     COPY(lambda2);
 213     COPY(picture_in_gop_number);
 214     COPY(gop_picture_number);
 215     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 216     COPY(progressive_frame);    // FIXME don't set in encode_header
 217     COPY(partitioned_frame);    // FIXME don't set in encode_header
 218 #undef COPY
 219 }
 220
 221 /**
 222  * Set the given MpegEncContext to defaults for encoding.
 223  * the changed fields will not depend upon the prior state of the MpegEncContext.
 224  */
 225 static void mpv_encode_defaults(MpegEncContext *s)
 226 {
 227     int i;
 228     ff_mpv_common_defaults(s);
 229
 230     for (i = -16; i < 16; i++) {
 231         default_fcode_tab[i + MAX_MV] = 1;
 232     }
 233     s->me.mv_penalty = default_mv_penalty;
 234     s->fcode_tab     = default_fcode_tab;
 235
 236     s->input_picture_number  = 0;
 237     s->picture_in_gop_number = 0;
 238 }
 239
 240 /* init video encoder */
 241 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 242 {
 243     MpegEncContext *s = avctx->priv_data;
 244     AVCPBProperties *cpb_props;
 245     int i, ret, format_supported;
 246
 247     mpv_encode_defaults(s);
 248
 249     switch (avctx->codec_id) {
 250     case AV_CODEC_ID_MPEG2VIDEO:
 251         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 252             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 253             av_log(avctx, AV_LOG_ERROR,
 254                    "only YUV420 and YUV422 are supported\n");
 255             return -1;
 256         }
 257         break;
 258     case AV_CODEC_ID_MJPEG:
 259         format_supported = 0;
 260         /* JPEG color space */
 261         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 262             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 263             (avctx->color_range == AVCOL_RANGE_JPEG &&
 264              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 265               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
 266             format_supported = 1;
 267         /* MPEG color space */
 268         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 269                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 270                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
 271             format_supported = 1;
 272
 273         if (!format_supported) {
 274             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 275             return -1;
 276         }
 277         break;
 278     default:
 279         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 280             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 281             return -1;
 282         }
 283     }
 284
 285     switch (avctx->pix_fmt) {
 286     case AV_PIX_FMT_YUVJ422P:
 287     case AV_PIX_FMT_YUV422P:
 288         s->chroma_format = CHROMA_422;
 289         break;
 290     case AV_PIX_FMT_YUVJ420P:
 291     case AV_PIX_FMT_YUV420P:
 292     default:
 293         s->chroma_format = CHROMA_420;
 294         break;
 295     }
 296
 297 #if FF_API_PRIVATE_OPT
 298 FF_DISABLE_DEPRECATION_WARNINGS
 299     if (avctx->rtp_payload_size)
 300         s->rtp_payload_size = avctx->rtp_payload_size;
 301 FF_ENABLE_DEPRECATION_WARNINGS
 302 #endif
 303
 304     s->bit_rate = avctx->bit_rate;
 305     s->width    = avctx->width;
 306     s->height   = avctx->height;
 307     if (avctx->gop_size > 600 &&
 308         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 309         av_log(avctx, AV_LOG_ERROR,
 310                "Warning keyframe interval too large! reducing it ...\n");
 311         avctx->gop_size = 600;
 312     }
 313     s->gop_size     = avctx->gop_size;
 314     s->avctx        = avctx;
 315     if (avctx->max_b_frames > MAX_B_FRAMES) {
 316         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 317                "is %d.\n", MAX_B_FRAMES);
 318     }
 319     s->max_b_frames = avctx->max_b_frames;
 320     s->codec_id     = avctx->codec->id;
 321     s->strict_std_compliance = avctx->strict_std_compliance;
 322     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
 323     s->rtp_mode           = !!s->rtp_payload_size;
 324     s->intra_dc_precision = avctx->intra_dc_precision;
 325     s->user_specified_pts = AV_NOPTS_VALUE;
 326
 327     if (s->gop_size <= 1) {
 328         s->intra_only = 1;
 329         s->gop_size   = 12;
 330     } else {
 331         s->intra_only = 0;
 332     }
 333
 334 #if FF_API_MOTION_EST
 335 FF_DISABLE_DEPRECATION_WARNINGS
 336     s->me_method = avctx->me_method;
 337 FF_ENABLE_DEPRECATION_WARNINGS
 338 #endif
 339
 340     /* Fixed QSCALE */
 341     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
 342
 343 #if FF_API_MPV_OPT
 344     FF_DISABLE_DEPRECATION_WARNINGS
 345     if (avctx->border_masking != 0.0)
 346         s->border_masking = avctx->border_masking;
 347     FF_ENABLE_DEPRECATION_WARNINGS
 348 #endif
 349
 350     s->adaptive_quant = (s->avctx->lumi_masking ||
 351                          s->avctx->dark_masking ||
 352                          s->avctx->temporal_cplx_masking ||
 353                          s->avctx->spatial_cplx_masking  ||
 354                          s->avctx->p_masking      ||
 355                          s->border_masking ||
 356                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 357                         !s->fixed_qscale;
 358
 359     s->loop_filter = !!(s->avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
 360
 361     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 362         av_log(avctx, AV_LOG_ERROR,
 363                "a vbv buffer size is needed, "
 364                "for encoding with a maximum bitrate\n");
 365         return -1;
 366     }
 367
 368     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 369         av_log(avctx, AV_LOG_INFO,
 370                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 371     }
 372
 373     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 374         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 375         return -1;
 376     }
 377
 378     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 379         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 380         return -1;
 381     }
 382
 383     if (avctx->rc_max_rate &&
 384         avctx->rc_max_rate == avctx->bit_rate &&
 385         avctx->rc_max_rate != avctx->rc_min_rate) {
 386         av_log(avctx, AV_LOG_INFO,
 387                "impossible bitrate constraints, this will fail\n");
 388     }
 389
 390     if (avctx->rc_buffer_size &&
 391         avctx->bit_rate * (int64_t)avctx->time_base.num >
 392             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 393         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 394         return -1;
 395     }
 396
 397     if (!s->fixed_qscale &&
 398         avctx->bit_rate * av_q2d(avctx->time_base) >
 399             avctx->bit_rate_tolerance) {
 400         av_log(avctx, AV_LOG_ERROR,
 401                "bitrate tolerance too small for bitrate\n");
 402         return -1;
 403     }
 404
 405     if (s->avctx->rc_max_rate &&
 406         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 407         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 408          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 409         90000LL * (avctx->rc_buffer_size - 1) >
 410             s->avctx->rc_max_rate * 0xFFFFLL) {
 411         av_log(avctx, AV_LOG_INFO,
 412                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 413                "specified vbv buffer is too large for the given bitrate!\n");
 414     }
 415
 416     if ((s->avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
 417         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 418         s->codec_id != AV_CODEC_ID_FLV1) {
 419         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 420         return -1;
 421     }
 422
 423     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 424         av_log(avctx, AV_LOG_ERROR,
 425                "OBMC is only supported with simple mb decision\n");
 426         return -1;
 427     }
 428
 429     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 430         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 431         return -1;
 432     }
 433
 434     if (s->max_b_frames                    &&
 435         s->codec_id != AV_CODEC_ID_MPEG4      &&
 436         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 437         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 438         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 439         return -1;
 440     }
 441
 442     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 443          s->codec_id == AV_CODEC_ID_H263  ||
 444          s->codec_id == AV_CODEC_ID_H263P) &&
 445         (avctx->sample_aspect_ratio.num > 255 ||
 446          avctx->sample_aspect_ratio.den > 255)) {
 447         av_log(avctx, AV_LOG_ERROR,
 448                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
 449                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 450         return -1;
 451     }
 452
 453     if ((s->avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
 454         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 455         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 456         return -1;
 457     }
 458
 459 #if FF_API_PRIVATE_OPT
 460     FF_DISABLE_DEPRECATION_WARNINGS
 461     if (avctx->mpeg_quant)
 462         s->mpeg_quant = avctx->mpeg_quant;
 463     FF_ENABLE_DEPRECATION_WARNINGS
 464 #endif
 465
 466     // FIXME mpeg2 uses that too
 467     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
 468         av_log(avctx, AV_LOG_ERROR,
 469                "mpeg2 style quantization not supported by codec\n");
 470         return -1;
 471     }
 472
 473     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 474         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 475         return -1;
 476     }
 477
 478     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 479         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 480         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 481         return -1;
 482     }
 483
 484 #if FF_API_PRIVATE_OPT
 485 FF_DISABLE_DEPRECATION_WARNINGS
 486     if (avctx->scenechange_threshold)
 487         s->scenechange_threshold = avctx->scenechange_threshold;
 488 FF_ENABLE_DEPRECATION_WARNINGS
 489 #endif
 490
 491     if (s->scenechange_threshold < 1000000000 &&
 492         (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
 493         av_log(avctx, AV_LOG_ERROR,
 494                "closed gop with scene change detection are not supported yet, "
 495                "set threshold to 1000000000\n");
 496         return -1;
 497     }
 498
 499     if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
 500         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 501             av_log(avctx, AV_LOG_ERROR,
 502                   "low delay forcing is only available for mpeg2\n");
 503             return -1;
 504         }
 505         if (s->max_b_frames != 0) {
 506             av_log(avctx, AV_LOG_ERROR,
 507                    "b frames cannot be used with low delay\n");
 508             return -1;
 509         }
 510     }
 511
 512     if (s->q_scale_type == 1) {
 513         if (avctx->qmax > 12) {
 514             av_log(avctx, AV_LOG_ERROR,
 515                    "non linear quant only supports qmax <= 12 currently\n");
 516             return -1;
 517         }
 518     }
 519
 520     if (avctx->slices > 1 &&
 521         (avctx->codec_id == AV_CODEC_ID_FLV1 || avctx->codec_id == AV_CODEC_ID_H261)) {
 522         av_log(avctx, AV_LOG_ERROR, "Multiple slices are not supported by this codec\n");
 523         return AVERROR(EINVAL);
 524     }
 525
 526     if (s->avctx->thread_count > 1         &&
 527         s->codec_id != AV_CODEC_ID_MPEG4      &&
 528         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 529         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 530         (s->codec_id != AV_CODEC_ID_H263P)) {
 531         av_log(avctx, AV_LOG_ERROR,
 532                "multi threaded encoding not supported by codec\n");
 533         return -1;
 534     }
 535
 536     if (s->avctx->thread_count < 1) {
 537         av_log(avctx, AV_LOG_ERROR,
 538                "automatic thread number detection not supported by codec,"
 539                "patch welcome\n");
 540         return -1;
 541     }
 542
 543     if (!avctx->time_base.den || !avctx->time_base.num) {
 544         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 545         return -1;
 546     }
 547
 548 #if FF_API_PRIVATE_OPT
 549 FF_DISABLE_DEPRECATION_WARNINGS
 550     if (avctx->b_frame_strategy)
 551         s->b_frame_strategy = avctx->b_frame_strategy;
 552     if (avctx->b_sensitivity != 40)
 553         s->b_sensitivity = avctx->b_sensitivity;
 554 FF_ENABLE_DEPRECATION_WARNINGS
 555 #endif
 556
 557     if (s->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
 558         av_log(avctx, AV_LOG_INFO,
 559                "notice: b_frame_strategy only affects the first pass\n");
 560         s->b_frame_strategy = 0;
 561     }
 562
 563     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 564     if (i > 1) {
 565         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 566         avctx->time_base.den /= i;
 567         avctx->time_base.num /= i;
 568         //return -1;
 569     }
 570
 571     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 572         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
 573         // (a + x * 3 / 8) / x
 574         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 575         s->inter_quant_bias = 0;
 576     } else {
 577         s->intra_quant_bias = 0;
 578         // (a - x / 4) / x
 579         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 580     }
 581
 582 #if FF_API_QUANT_BIAS
 583 FF_DISABLE_DEPRECATION_WARNINGS
 584     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 585         s->intra_quant_bias = avctx->intra_quant_bias;
 586     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 587         s->inter_quant_bias = avctx->inter_quant_bias;
 588 FF_ENABLE_DEPRECATION_WARNINGS
 589 #endif
 590
 591     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 592         s->avctx->time_base.den > (1 << 16) - 1) {
 593         av_log(avctx, AV_LOG_ERROR,
 594                "timebase %d/%d not supported by MPEG 4 standard, "
 595                "the maximum admitted value for the timebase denominator "
 596                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 597                (1 << 16) - 1);
 598         return -1;
 599     }
 600     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 601
 602     switch (avctx->codec->id) {
 603     case AV_CODEC_ID_MPEG1VIDEO:
 604         s->out_format = FMT_MPEG1;
 605         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
 606         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 607         break;
 608     case AV_CODEC_ID_MPEG2VIDEO:
 609         s->out_format = FMT_MPEG1;
 610         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
 611         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 612         s->rtp_mode   = 1;
 613         break;
 614     case AV_CODEC_ID_MJPEG:
 615         s->out_format = FMT_MJPEG;
 616         s->intra_only = 1; /* force intra only for jpeg */
 617         if (!CONFIG_MJPEG_ENCODER ||
 618             ff_mjpeg_encode_init(s) < 0)
 619             return -1;
 620         avctx->delay = 0;
 621         s->low_delay = 1;
 622         break;
 623     case AV_CODEC_ID_H261:
 624         if (!CONFIG_H261_ENCODER)
 625             return -1;
 626         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 627             av_log(avctx, AV_LOG_ERROR,
 628                    "The specified picture size of %dx%d is not valid for the "
 629                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 630                     s->width, s->height);
 631             return -1;
 632         }
 633         s->out_format = FMT_H261;
 634         avctx->delay  = 0;
 635         s->low_delay  = 1;
 636         s->rtp_mode   = 0; /* Sliced encoding not supported */
 637         break;
 638     case AV_CODEC_ID_H263:
 639         if (!CONFIG_H263_ENCODER)
 640         return -1;
 641         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 642                              s->width, s->height) == 8) {
 643             av_log(avctx, AV_LOG_INFO,
 644                    "The specified picture size of %dx%d is not valid for "
 645                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 646                    "352x288, 704x576, and 1408x1152."
 647                    "Try H.263+.\n", s->width, s->height);
 648             return -1;
 649         }
 650         s->out_format = FMT_H263;
 651         avctx->delay  = 0;
 652         s->low_delay  = 1;
 653         break;
 654     case AV_CODEC_ID_H263P:
 655         s->out_format = FMT_H263;
 656         s->h263_plus  = 1;
 657         /* Fx */
 658         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
 659         s->modified_quant  = s->h263_aic;
 660         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 661         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 662
 663         /* /Fx */
 664         /* These are just to be sure */
 665         avctx->delay = 0;
 666         s->low_delay = 1;
 667         break;
 668     case AV_CODEC_ID_FLV1:
 669         s->out_format      = FMT_H263;
 670         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 671         s->unrestricted_mv = 1;
 672         s->rtp_mode  = 0; /* don't allow GOB */
 673         avctx->delay = 0;
 674         s->low_delay = 1;
 675         break;
 676     case AV_CODEC_ID_RV10:
 677         s->out_format = FMT_H263;
 678         avctx->delay  = 0;
 679         s->low_delay  = 1;
 680         break;
 681     case AV_CODEC_ID_RV20:
 682         s->out_format      = FMT_H263;
 683         avctx->delay       = 0;
 684         s->low_delay       = 1;
 685         s->modified_quant  = 1;
 686         s->h263_aic        = 1;
 687         s->h263_plus       = 1;
 688         s->loop_filter     = 1;
 689         s->unrestricted_mv = 0;
 690         break;
 691     case AV_CODEC_ID_MPEG4:
 692         s->out_format      = FMT_H263;
 693         s->h263_pred       = 1;
 694         s->unrestricted_mv = 1;
 695         s->low_delay       = s->max_b_frames ? 0 : 1;
 696         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 697         break;
 698     case AV_CODEC_ID_MSMPEG4V2:
 699         s->out_format      = FMT_H263;
 700         s->h263_pred       = 1;
 701         s->unrestricted_mv = 1;
 702         s->msmpeg4_version = 2;
 703         avctx->delay       = 0;
 704         s->low_delay       = 1;
 705         break;
 706     case AV_CODEC_ID_MSMPEG4V3:
 707         s->out_format        = FMT_H263;
 708         s->h263_pred         = 1;
 709         s->unrestricted_mv   = 1;
 710         s->msmpeg4_version   = 3;
 711         s->flipflop_rounding = 1;
 712         avctx->delay         = 0;
 713         s->low_delay         = 1;
 714         break;
 715     case AV_CODEC_ID_WMV1:
 716         s->out_format        = FMT_H263;
 717         s->h263_pred         = 1;
 718         s->unrestricted_mv   = 1;
 719         s->msmpeg4_version   = 4;
 720         s->flipflop_rounding = 1;
 721         avctx->delay         = 0;
 722         s->low_delay         = 1;
 723         break;
 724     case AV_CODEC_ID_WMV2:
 725         s->out_format        = FMT_H263;
 726         s->h263_pred         = 1;
 727         s->unrestricted_mv   = 1;
 728         s->msmpeg4_version   = 5;
 729         s->flipflop_rounding = 1;
 730         avctx->delay         = 0;
 731         s->low_delay         = 1;
 732         break;
 733     default:
 734         return -1;
 735     }
 736
 737 #if FF_API_PRIVATE_OPT
 738     FF_DISABLE_DEPRECATION_WARNINGS
 739     if (avctx->noise_reduction)
 740         s->noise_reduction = avctx->noise_reduction;
 741     FF_ENABLE_DEPRECATION_WARNINGS
 742 #endif
 743
 744     avctx->has_b_frames = !s->low_delay;
 745
 746     s->encoding = 1;
 747
 748     s->progressive_frame    =
 749     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
 750                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
 751                                 s->alternate_scan);
 752
 753     /* init */
 754     ff_mpv_idct_init(s);
 755     if (ff_mpv_common_init(s) < 0)
 756         return -1;
 757
 758     if (ARCH_X86)
 759         ff_mpv_encode_init_x86(s);
 760
 761     ff_fdctdsp_init(&s->fdsp, avctx);
 762     ff_me_cmp_init(&s->mecc, avctx);
 763     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 764     ff_pixblockdsp_init(&s->pdsp, avctx);
 765     ff_qpeldsp_init(&s->qdsp);
 766
 767     if (s->msmpeg4_version) {
 768         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 769                           2 * 2 * (MAX_LEVEL + 1) *
 770                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 771     }
 772     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 773
 774     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 775     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 776     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 777     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 778     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 779                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 780     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 781                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 782
 783
 784     if (s->noise_reduction) {
 785         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 786                           2 * 64 * sizeof(uint16_t), fail);
 787     }
 788
 789     if (CONFIG_H263_ENCODER)
 790         ff_h263dsp_init(&s->h263dsp);
 791     if (!s->dct_quantize)
 792         s->dct_quantize = ff_dct_quantize_c;
 793     if (!s->denoise_dct)
 794         s->denoise_dct  = denoise_dct_c;
 795     s->fast_dct_quantize = s->dct_quantize;
 796     if (avctx->trellis)
 797         s->dct_quantize  = dct_quantize_trellis_c;
 798
 799     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 800         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 801
 802     if (s->slice_context_count > 1) {
 803         s->rtp_mode = 1;
 804
 805         if (avctx->codec_id == AV_CODEC_ID_H263 || avctx->codec_id == AV_CODEC_ID_H263P)
 806             s->h263_slice_structured = 1;
 807     }
 808
 809     s->quant_precision = 5;
 810
 811 #if FF_API_PRIVATE_OPT
 812 FF_DISABLE_DEPRECATION_WARNINGS
 813     if (avctx->frame_skip_threshold)
 814         s->frame_skip_threshold = avctx->frame_skip_threshold;
 815     if (avctx->frame_skip_factor)
 816         s->frame_skip_factor = avctx->frame_skip_factor;
 817     if (avctx->frame_skip_exp)
 818         s->frame_skip_exp = avctx->frame_skip_exp;
 819     if (avctx->frame_skip_cmp != FF_CMP_DCTMAX)
 820         s->frame_skip_cmp = avctx->frame_skip_cmp;
 821 FF_ENABLE_DEPRECATION_WARNINGS
 822 #endif
 823
 824     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 825     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->frame_skip_cmp);
 826
 827     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 828         ff_h261_encode_init(s);
 829     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 830         ff_h263_encode_init(s);
 831     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 832         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
 833             return ret;
 834     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 835         && s->out_format == FMT_MPEG1)
 836         ff_mpeg1_encode_init(s);
 837
 838     /* init q matrix */
 839     for (i = 0; i < 64; i++) {
 840         int j = s->idsp.idct_permutation[i];
 841         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 842             s->mpeg_quant) {
 843             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 844             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 845         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 846             s->intra_matrix[j] =
 847             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 848         } else {
 849             /* mpeg1/2 */
 850             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 851             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 852         }
 853         if (s->avctx->intra_matrix)
 854             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 855         if (s->avctx->inter_matrix)
 856             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 857     }
 858
 859     /* precompute matrix */
 860     /* for mjpeg, we do include qscale in the matrix */
 861     if (s->out_format != FMT_MJPEG) {
 862         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 863                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 864                           31, 1);
 865         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 866                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 867                           31, 0);
 868     }
 869
 870     if (ff_rate_control_init(s) < 0)
 871         return -1;
 872
 873 #if FF_API_ERROR_RATE
 874     FF_DISABLE_DEPRECATION_WARNINGS
 875     if (avctx->error_rate)
 876         s->error_rate = avctx->error_rate;
 877     FF_ENABLE_DEPRECATION_WARNINGS;
 878 #endif
 879
 880 #if FF_API_NORMALIZE_AQP
 881     FF_DISABLE_DEPRECATION_WARNINGS
 882     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 883         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 884     FF_ENABLE_DEPRECATION_WARNINGS;
 885 #endif
 886
 887 #if FF_API_MV0
 888     FF_DISABLE_DEPRECATION_WARNINGS
 889     if (avctx->flags & CODEC_FLAG_MV0)
 890         s->mpv_flags |= FF_MPV_FLAG_MV0;
 891     FF_ENABLE_DEPRECATION_WARNINGS
 892 #endif
 893
 894 #if FF_API_MPV_OPT
 895     FF_DISABLE_DEPRECATION_WARNINGS
 896     if (avctx->rc_qsquish != 0.0)
 897         s->rc_qsquish = avctx->rc_qsquish;
 898     if (avctx->rc_qmod_amp != 0.0)
 899         s->rc_qmod_amp = avctx->rc_qmod_amp;
 900     if (avctx->rc_qmod_freq)
 901         s->rc_qmod_freq = avctx->rc_qmod_freq;
 902     if (avctx->rc_buffer_aggressivity != 1.0)
 903         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
 904     if (avctx->rc_initial_cplx != 0.0)
 905         s->rc_initial_cplx = avctx->rc_initial_cplx;
 906     if (avctx->lmin)
 907         s->lmin = avctx->lmin;
 908     if (avctx->lmax)
 909         s->lmax = avctx->lmax;
 910
 911     if (avctx->rc_eq) {
 912         av_freep(&s->rc_eq);
 913         s->rc_eq = av_strdup(avctx->rc_eq);
 914         if (!s->rc_eq)
 915             return AVERROR(ENOMEM);
 916     }
 917     FF_ENABLE_DEPRECATION_WARNINGS
 918 #endif
 919
 920 #if FF_API_PRIVATE_OPT
 921     FF_DISABLE_DEPRECATION_WARNINGS
 922     if (avctx->brd_scale)
 923         s->brd_scale = avctx->brd_scale;
 924
 925     if (avctx->prediction_method)
 926         s->pred = avctx->prediction_method + 1;
 927     FF_ENABLE_DEPRECATION_WARNINGS
 928 #endif
 929
 930     if (s->b_frame_strategy == 2) {
 931         for (i = 0; i < s->max_b_frames + 2; i++) {
 932             s->tmp_frames[i] = av_frame_alloc();
 933             if (!s->tmp_frames[i])
 934                 return AVERROR(ENOMEM);
 935
 936             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 937             s->tmp_frames[i]->width  = s->width  >> s->brd_scale;
 938             s->tmp_frames[i]->height = s->height >> s->brd_scale;
 939
 940             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 941             if (ret < 0)
 942                 return ret;
 943         }
 944     }
 945
 946     cpb_props = ff_add_cpb_side_data(avctx);
 947     if (!cpb_props)
 948         return AVERROR(ENOMEM);
 949     cpb_props->max_bitrate = avctx->rc_max_rate;
 950     cpb_props->min_bitrate = avctx->rc_min_rate;
 951     cpb_props->avg_bitrate = avctx->bit_rate;
 952     cpb_props->buffer_size = avctx->rc_buffer_size;
 953
 954     return 0;
 955 fail:
 956     ff_mpv_encode_end(avctx);
 957     return AVERROR_UNKNOWN;
 958 }
 959
 960 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
 961 {
 962     MpegEncContext *s = avctx->priv_data;
 963     int i;
 964
 965     ff_rate_control_uninit(s);
 966
 967     ff_mpv_common_end(s);
 968     if (CONFIG_MJPEG_ENCODER &&
 969         s->out_format == FMT_MJPEG)
 970         ff_mjpeg_encode_close(s);
 971
 972     av_freep(&avctx->extradata);
 973
 974     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 975         av_frame_free(&s->tmp_frames[i]);
 976
 977     ff_free_picture_tables(&s->new_picture);
 978     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
 979
 980     av_freep(&s->avctx->stats_out);
 981     av_freep(&s->ac_stats);
 982
 983     av_freep(&s->q_intra_matrix);
 984     av_freep(&s->q_inter_matrix);
 985     av_freep(&s->q_intra_matrix16);
 986     av_freep(&s->q_inter_matrix16);
 987     av_freep(&s->input_picture);
 988     av_freep(&s->reordered_input_picture);
 989     av_freep(&s->dct_offset);
 990
 991     return 0;
 992 }
 993
 994 static int get_sae(uint8_t *src, int ref, int stride)
 995 {
 996     int x,y;
 997     int acc = 0;
 998
 999     for (y = 0; y < 16; y++) {
1000         for (x = 0; x < 16; x++) {
1001             acc += FFABS(src[x + y * stride] - ref);
1002         }
1003     }
1004
1005     return acc;
1006 }
1007
1008 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1009                            uint8_t *ref, int stride)
1010 {
1011     int x, y, w, h;
1012     int acc = 0;
1013
1014     w = s->width  & ~15;
1015     h = s->height & ~15;
1016
1017     for (y = 0; y < h; y += 16) {
1018         for (x = 0; x < w; x += 16) {
1019             int offset = x + y * stride;
1020             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1021                                       stride, 16);
1022             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1023             int sae  = get_sae(src + offset, mean, stride);
1024
1025             acc += sae + 500 < sad;
1026         }
1027     }
1028     return acc;
1029 }
1030
1031 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1032 {
1033     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1034                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1035                             s->mb_stride, s->mb_height, s->b8_stride,
1036                             &s->linesize, &s->uvlinesize);
1037 }
1038
1039 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1040 {
1041     Picture *pic = NULL;
1042     int64_t pts;
1043     int i, display_picture_number = 0, ret;
1044     int encoding_delay = s->max_b_frames ? s->max_b_frames
1045                                          : (s->low_delay ? 0 : 1);
1046     int flush_offset = 1;
1047     int direct = 1;
1048
1049     if (pic_arg) {
1050         pts = pic_arg->pts;
1051         display_picture_number = s->input_picture_number++;
1052
1053         if (pts != AV_NOPTS_VALUE) {
1054             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1055                 int64_t time = pts;
1056                 int64_t last = s->user_specified_pts;
1057
1058                 if (time <= last) {
1059                     av_log(s->avctx, AV_LOG_ERROR,
1060                            "Error, Invalid timestamp=%"PRId64", "
1061                            "last=%"PRId64"\n", pts, s->user_specified_pts);
1062                     return -1;
1063                 }
1064
1065                 if (!s->low_delay && display_picture_number == 1)
1066                     s->dts_delta = time - last;
1067             }
1068             s->user_specified_pts = pts;
1069         } else {
1070             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1071                 s->user_specified_pts =
1072                 pts = s->user_specified_pts + 1;
1073                 av_log(s->avctx, AV_LOG_INFO,
1074                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1075                        pts);
1076             } else {
1077                 pts = display_picture_number;
1078             }
1079         }
1080
1081         if (!pic_arg->buf[0] ||
1082             pic_arg->linesize[0] != s->linesize ||
1083             pic_arg->linesize[1] != s->uvlinesize ||
1084             pic_arg->linesize[2] != s->uvlinesize)
1085             direct = 0;
1086         if ((s->width & 15) || (s->height & 15))
1087             direct = 0;
1088
1089         ff_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
1090                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1091
1092         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1093         if (i < 0)
1094             return i;
1095
1096         pic = &s->picture[i];
1097         pic->reference = 3;
1098
1099         if (direct) {
1100             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1101                 return ret;
1102         }
1103         ret = alloc_picture(s, pic, direct);
1104         if (ret < 0)
1105             return ret;
1106
1107         if (!direct) {
1108             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1109                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1110                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1111                 // empty
1112             } else {
1113                 int h_chroma_shift, v_chroma_shift;
1114                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1115                                                  &h_chroma_shift,
1116                                                  &v_chroma_shift);
1117
1118                 for (i = 0; i < 3; i++) {
1119                     int src_stride = pic_arg->linesize[i];
1120                     int dst_stride = i ? s->uvlinesize : s->linesize;
1121                     int h_shift = i ? h_chroma_shift : 0;
1122                     int v_shift = i ? v_chroma_shift : 0;
1123                     int w = s->width  >> h_shift;
1124                     int h = s->height >> v_shift;
1125                     uint8_t *src = pic_arg->data[i];
1126                     uint8_t *dst = pic->f->data[i];
1127
1128                     if (!s->avctx->rc_buffer_size)
1129                         dst += INPLACE_OFFSET;
1130
1131                     if (src_stride == dst_stride)
1132                         memcpy(dst, src, src_stride * h);
1133                     else {
1134                         int h2 = h;
1135                         uint8_t *dst2 = dst;
1136                         while (h2--) {
1137                             memcpy(dst2, src, w);
1138                             dst2 += dst_stride;
1139                             src += src_stride;
1140                         }
1141                     }
1142                     if ((s->width & 15) || (s->height & 15)) {
1143                         s->mpvencdsp.draw_edges(dst, dst_stride,
1144                                                 w, h,
1145                                                 16 >> h_shift,
1146                                                 16 >> v_shift,
1147                                                 EDGE_BOTTOM);
1148                     }
1149                 }
1150             }
1151         }
1152         ret = av_frame_copy_props(pic->f, pic_arg);
1153         if (ret < 0)
1154             return ret;
1155
1156         pic->f->display_picture_number = display_picture_number;
1157         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1158     } else {
1159         /* Flushing: When we have not received enough input frames,
1160          * ensure s->input_picture[0] contains the first picture */
1161         for (flush_offset = 0; flush_offset < encoding_delay + 1; flush_offset++)
1162             if (s->input_picture[flush_offset])
1163                 break;
1164
1165         if (flush_offset <= 1)
1166             flush_offset = 1;
1167         else
1168             encoding_delay = encoding_delay - flush_offset + 1;
1169     }
1170
1171     /* shift buffer entries */
1172     for (i = flush_offset; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1173         s->input_picture[i - flush_offset] = s->input_picture[i];
1174
1175     s->input_picture[encoding_delay] = (Picture*) pic;
1176
1177     return 0;
1178 }
1179
1180 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1181 {
1182     int x, y, plane;
1183     int score = 0;
1184     int64_t score64 = 0;
1185
1186     for (plane = 0; plane < 3; plane++) {
1187         const int stride = p->f->linesize[plane];
1188         const int bw = plane ? 1 : 2;
1189         for (y = 0; y < s->mb_height * bw; y++) {
1190             for (x = 0; x < s->mb_width * bw; x++) {
1191                 int off = p->shared ? 0 : 16;
1192                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1193                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1194                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1195
1196                 switch (s->frame_skip_exp) {
1197                 case 0: score    =  FFMAX(score, v);          break;
1198                 case 1: score   += FFABS(v);                  break;
1199                 case 2: score   += v * v;                     break;
1200                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1201                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1202                 }
1203             }
1204         }
1205     }
1206
1207     if (score)
1208         score64 = score;
1209
1210     if (score64 < s->frame_skip_threshold)
1211         return 1;
1212     if (score64 < ((s->frame_skip_factor * (int64_t) s->lambda) >> 8))
1213         return 1;
1214     return 0;
1215 }
1216
1217 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1218 {
1219     AVPacket pkt = { 0 };
1220     int ret, got_output;
1221
1222     av_init_packet(&pkt);
1223     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1224     if (ret < 0)
1225         return ret;
1226
1227     ret = pkt.size;
1228     av_packet_unref(&pkt);
1229     return ret;
1230 }
1231
1232 static int estimate_best_b_count(MpegEncContext *s)
1233 {
1234     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1235     AVCodecContext *c = avcodec_alloc_context3(NULL);
1236     const int scale = s->brd_scale;
1237     int i, j, out_size, p_lambda, b_lambda, lambda2;
1238     int64_t best_rd  = INT64_MAX;
1239     int best_b_count = -1;
1240
1241     if (!c)
1242         return AVERROR(ENOMEM);
1243     assert(scale >= 0 && scale <= 3);
1244
1245     //emms_c();
1246     //s->next_picture_ptr->quality;
1247     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1248     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1249     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1250     if (!b_lambda) // FIXME we should do this somewhere else
1251         b_lambda = p_lambda;
1252     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1253                FF_LAMBDA_SHIFT;
1254
1255     c->width        = s->width  >> scale;
1256     c->height       = s->height >> scale;
1257     c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1258     c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1259     c->mb_decision  = s->avctx->mb_decision;
1260     c->me_cmp       = s->avctx->me_cmp;
1261     c->mb_cmp       = s->avctx->mb_cmp;
1262     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1263     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1264     c->time_base    = s->avctx->time_base;
1265     c->max_b_frames = s->max_b_frames;
1266
1267     if (avcodec_open2(c, codec, NULL) < 0)
1268         return -1;
1269
1270     for (i = 0; i < s->max_b_frames + 2; i++) {
1271         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1272                                                 s->next_picture_ptr;
1273
1274         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1275             pre_input = *pre_input_ptr;
1276
1277             if (!pre_input.shared && i) {
1278                 pre_input.f->data[0] += INPLACE_OFFSET;
1279                 pre_input.f->data[1] += INPLACE_OFFSET;
1280                 pre_input.f->data[2] += INPLACE_OFFSET;
1281             }
1282
1283             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1284                                        s->tmp_frames[i]->linesize[0],
1285                                        pre_input.f->data[0],
1286                                        pre_input.f->linesize[0],
1287                                        c->width, c->height);
1288             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1289                                        s->tmp_frames[i]->linesize[1],
1290                                        pre_input.f->data[1],
1291                                        pre_input.f->linesize[1],
1292                                        c->width >> 1, c->height >> 1);
1293             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1294                                        s->tmp_frames[i]->linesize[2],
1295                                        pre_input.f->data[2],
1296                                        pre_input.f->linesize[2],
1297                                        c->width >> 1, c->height >> 1);
1298         }
1299     }
1300
1301     for (j = 0; j < s->max_b_frames + 1; j++) {
1302         int64_t rd = 0;
1303
1304         if (!s->input_picture[j])
1305             break;
1306
1307         c->error[0] = c->error[1] = c->error[2] = 0;
1308
1309         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1310         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1311
1312         out_size = encode_frame(c, s->tmp_frames[0]);
1313
1314         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1315
1316         for (i = 0; i < s->max_b_frames + 1; i++) {
1317             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1318
1319             s->tmp_frames[i + 1]->pict_type = is_p ?
1320                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1321             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1322
1323             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1324
1325             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1326         }
1327
1328         /* get the delayed frames */
1329         while (out_size) {
1330             out_size = encode_frame(c, NULL);
1331             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1332         }
1333
1334         rd += c->error[0] + c->error[1] + c->error[2];
1335
1336         if (rd < best_rd) {
1337             best_rd = rd;
1338             best_b_count = j;
1339         }
1340     }
1341
1342     avcodec_close(c);
1343     av_freep(&c);
1344
1345     return best_b_count;
1346 }
1347
1348 static int select_input_picture(MpegEncContext *s)
1349 {
1350     int i, ret;
1351
1352     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1353         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1354     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1355
1356     /* set next picture type & ordering */
1357     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1358         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1359             !s->next_picture_ptr || s->intra_only) {
1360             s->reordered_input_picture[0] = s->input_picture[0];
1361             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1362             s->reordered_input_picture[0]->f->coded_picture_number =
1363                 s->coded_picture_number++;
1364         } else {
1365             int b_frames = 0;
1366
1367             if (s->frame_skip_threshold || s->frame_skip_factor) {
1368                 if (s->picture_in_gop_number < s->gop_size &&
1369                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1370                     // FIXME check that te gop check above is +-1 correct
1371                     av_frame_unref(s->input_picture[0]->f);
1372
1373                     emms_c();
1374                     ff_vbv_update(s, 0);
1375
1376                     goto no_output_pic;
1377                 }
1378             }
1379
1380             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1381                 for (i = 0; i < s->max_b_frames + 1; i++) {
1382                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1383
1384                     if (pict_num >= s->rc_context.num_entries)
1385                         break;
1386                     if (!s->input_picture[i]) {
1387                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1388                         break;
1389                     }
1390
1391                     s->input_picture[i]->f->pict_type =
1392                         s->rc_context.entry[pict_num].new_pict_type;
1393                 }
1394             }
1395
1396             if (s->b_frame_strategy == 0) {
1397                 b_frames = s->max_b_frames;
1398                 while (b_frames && !s->input_picture[b_frames])
1399                     b_frames--;
1400             } else if (s->b_frame_strategy == 1) {
1401                 for (i = 1; i < s->max_b_frames + 1; i++) {
1402                     if (s->input_picture[i] &&
1403                         s->input_picture[i]->b_frame_score == 0) {
1404                         s->input_picture[i]->b_frame_score =
1405                             get_intra_count(s,
1406                                             s->input_picture[i    ]->f->data[0],
1407                                             s->input_picture[i - 1]->f->data[0],
1408                                             s->linesize) + 1;
1409                     }
1410                 }
1411                 for (i = 0; i < s->max_b_frames + 1; i++) {
1412                     if (!s->input_picture[i] ||
1413                         s->input_picture[i]->b_frame_score - 1 >
1414                             s->mb_num / s->b_sensitivity)
1415                         break;
1416                 }
1417
1418                 b_frames = FFMAX(0, i - 1);
1419
1420                 /* reset scores */
1421                 for (i = 0; i < b_frames + 1; i++) {
1422                     s->input_picture[i]->b_frame_score = 0;
1423                 }
1424             } else if (s->b_frame_strategy == 2) {
1425                 b_frames = estimate_best_b_count(s);
1426             }
1427
1428             emms_c();
1429
1430             for (i = b_frames - 1; i >= 0; i--) {
1431                 int type = s->input_picture[i]->f->pict_type;
1432                 if (type && type != AV_PICTURE_TYPE_B)
1433                     b_frames = i;
1434             }
1435             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1436                 b_frames == s->max_b_frames) {
1437                 av_log(s->avctx, AV_LOG_ERROR,
1438                        "warning, too many b frames in a row\n");
1439             }
1440
1441             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1442                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1443                     s->gop_size > s->picture_in_gop_number) {
1444                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1445                 } else {
1446                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1447                         b_frames = 0;
1448                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1449                 }
1450             }
1451
1452             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1453                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1454                 b_frames--;
1455
1456             s->reordered_input_picture[0] = s->input_picture[b_frames];
1457             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1458                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1459             s->reordered_input_picture[0]->f->coded_picture_number =
1460                 s->coded_picture_number++;
1461             for (i = 0; i < b_frames; i++) {
1462                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1463                 s->reordered_input_picture[i + 1]->f->pict_type =
1464                     AV_PICTURE_TYPE_B;
1465                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1466                     s->coded_picture_number++;
1467             }
1468         }
1469     }
1470 no_output_pic:
1471     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1472
1473     if (s->reordered_input_picture[0]) {
1474         s->reordered_input_picture[0]->reference =
1475            s->reordered_input_picture[0]->f->pict_type !=
1476                AV_PICTURE_TYPE_B ? 3 : 0;
1477
1478         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1479             return ret;
1480
1481         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1482             // input is a shared pix, so we can't modifiy it -> alloc a new
1483             // one & ensure that the shared one is reuseable
1484
1485             Picture *pic;
1486             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1487             if (i < 0)
1488                 return i;
1489             pic = &s->picture[i];
1490
1491             pic->reference = s->reordered_input_picture[0]->reference;
1492             if (alloc_picture(s, pic, 0) < 0) {
1493                 return -1;
1494             }
1495
1496             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1497             if (ret < 0)
1498                 return ret;
1499
1500             /* mark us unused / free shared pic */
1501             av_frame_unref(s->reordered_input_picture[0]->f);
1502             s->reordered_input_picture[0]->shared = 0;
1503
1504             s->current_picture_ptr = pic;
1505         } else {
1506             // input is not a shared pix -> reuse buffer for current_pix
1507             s->current_picture_ptr = s->reordered_input_picture[0];
1508             for (i = 0; i < 4; i++) {
1509                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1510             }
1511         }
1512         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1513         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1514                                        s->current_picture_ptr)) < 0)
1515             return ret;
1516
1517         s->picture_number = s->new_picture.f->display_picture_number;
1518     }
1519     return 0;
1520 }
1521
1522 static void frame_end(MpegEncContext *s)
1523 {
1524     int i;
1525
1526     if (s->unrestricted_mv &&
1527         s->current_picture.reference &&
1528         !s->intra_only) {
1529         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1530         int hshift = desc->log2_chroma_w;
1531         int vshift = desc->log2_chroma_h;
1532         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1533                                 s->h_edge_pos, s->v_edge_pos,
1534                                 EDGE_WIDTH, EDGE_WIDTH,
1535                                 EDGE_TOP | EDGE_BOTTOM);
1536         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1537                                 s->h_edge_pos >> hshift,
1538                                 s->v_edge_pos >> vshift,
1539                                 EDGE_WIDTH >> hshift,
1540                                 EDGE_WIDTH >> vshift,
1541                                 EDGE_TOP | EDGE_BOTTOM);
1542         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1543                                 s->h_edge_pos >> hshift,
1544                                 s->v_edge_pos >> vshift,
1545                                 EDGE_WIDTH >> hshift,
1546                                 EDGE_WIDTH >> vshift,
1547                                 EDGE_TOP | EDGE_BOTTOM);
1548     }
1549
1550     emms_c();
1551
1552     s->last_pict_type                 = s->pict_type;
1553     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1554     if (s->pict_type!= AV_PICTURE_TYPE_B)
1555         s->last_non_b_pict_type = s->pict_type;
1556
1557     if (s->encoding) {
1558         /* release non-reference frames */
1559         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1560             if (!s->picture[i].reference)
1561                 ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1562         }
1563     }
1564
1565 #if FF_API_CODED_FRAME
1566 FF_DISABLE_DEPRECATION_WARNINGS
1567     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1568 FF_ENABLE_DEPRECATION_WARNINGS
1569 #endif
1570 #if FF_API_ERROR_FRAME
1571 FF_DISABLE_DEPRECATION_WARNINGS
1572     memcpy(s->current_picture.f->error, s->current_picture.encoding_error,
1573            sizeof(s->current_picture.encoding_error));
1574 FF_ENABLE_DEPRECATION_WARNINGS
1575 #endif
1576 }
1577
1578 static void update_noise_reduction(MpegEncContext *s)
1579 {
1580     int intra, i;
1581
1582     for (intra = 0; intra < 2; intra++) {
1583         if (s->dct_count[intra] > (1 << 16)) {
1584             for (i = 0; i < 64; i++) {
1585                 s->dct_error_sum[intra][i] >>= 1;
1586             }
1587             s->dct_count[intra] >>= 1;
1588         }
1589
1590         for (i = 0; i < 64; i++) {
1591             s->dct_offset[intra][i] = (s->noise_reduction *
1592                                        s->dct_count[intra] +
1593                                        s->dct_error_sum[intra][i] / 2) /
1594                                       (s->dct_error_sum[intra][i] + 1);
1595         }
1596     }
1597 }
1598
1599 static int frame_start(MpegEncContext *s)
1600 {
1601     int ret;
1602
1603     /* mark & release old frames */
1604     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1605         s->last_picture_ptr != s->next_picture_ptr &&
1606         s->last_picture_ptr->f->buf[0]) {
1607         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1608     }
1609
1610     s->current_picture_ptr->f->pict_type = s->pict_type;
1611     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1612
1613     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1614     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1615                                    s->current_picture_ptr)) < 0)
1616         return ret;
1617
1618     if (s->pict_type != AV_PICTURE_TYPE_B) {
1619         s->last_picture_ptr = s->next_picture_ptr;
1620         if (!s->droppable)
1621             s->next_picture_ptr = s->current_picture_ptr;
1622     }
1623
1624     if (s->last_picture_ptr) {
1625         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1626         if (s->last_picture_ptr->f->buf[0] &&
1627             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1628                                        s->last_picture_ptr)) < 0)
1629             return ret;
1630     }
1631     if (s->next_picture_ptr) {
1632         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1633         if (s->next_picture_ptr->f->buf[0] &&
1634             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1635                                        s->next_picture_ptr)) < 0)
1636             return ret;
1637     }
1638
1639     if (s->picture_structure!= PICT_FRAME) {
1640         int i;
1641         for (i = 0; i < 4; i++) {
1642             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1643                 s->current_picture.f->data[i] +=
1644                     s->current_picture.f->linesize[i];
1645             }
1646             s->current_picture.f->linesize[i] *= 2;
1647             s->last_picture.f->linesize[i]    *= 2;
1648             s->next_picture.f->linesize[i]    *= 2;
1649         }
1650     }
1651
1652     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1653         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1654         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1655     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1656         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1657         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1658     } else {
1659         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1660         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1661     }
1662
1663     if (s->dct_error_sum) {
1664         assert(s->noise_reduction && s->encoding);
1665         update_noise_reduction(s);
1666     }
1667
1668     return 0;
1669 }
1670
1671 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1672                           const AVFrame *pic_arg, int *got_packet)
1673 {
1674     MpegEncContext *s = avctx->priv_data;
1675     int i, stuffing_count, ret;
1676     int context_count = s->slice_context_count;
1677
1678     s->picture_in_gop_number++;
1679
1680     if (load_input_picture(s, pic_arg) < 0)
1681         return -1;
1682
1683     if (select_input_picture(s) < 0) {
1684         return -1;
1685     }
1686
1687     /* output? */
1688     if (s->new_picture.f->data[0]) {
1689         uint8_t *sd;
1690         if (!pkt->data &&
1691             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1692             return ret;
1693         if (s->mb_info) {
1694             s->mb_info_ptr = av_packet_new_side_data(pkt,
1695                                  AV_PKT_DATA_H263_MB_INFO,
1696                                  s->mb_width*s->mb_height*12);
1697             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1698         }
1699
1700         for (i = 0; i < context_count; i++) {
1701             int start_y = s->thread_context[i]->start_mb_y;
1702             int   end_y = s->thread_context[i]->  end_mb_y;
1703             int h       = s->mb_height;
1704             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1705             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1706
1707             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1708         }
1709
1710         s->pict_type = s->new_picture.f->pict_type;
1711         //emms_c();
1712         ret = frame_start(s);
1713         if (ret < 0)
1714             return ret;
1715 vbv_retry:
1716         if (encode_picture(s, s->picture_number) < 0)
1717             return -1;
1718
1719 #if FF_API_STAT_BITS
1720 FF_DISABLE_DEPRECATION_WARNINGS
1721         avctx->header_bits = s->header_bits;
1722         avctx->mv_bits     = s->mv_bits;
1723         avctx->misc_bits   = s->misc_bits;
1724         avctx->i_tex_bits  = s->i_tex_bits;
1725         avctx->p_tex_bits  = s->p_tex_bits;
1726         avctx->i_count     = s->i_count;
1727         // FIXME f/b_count in avctx
1728         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1729         avctx->skip_count  = s->skip_count;
1730 FF_ENABLE_DEPRECATION_WARNINGS
1731 #endif
1732
1733         frame_end(s);
1734
1735         sd = av_packet_new_side_data(pkt, AV_PKT_DATA_QUALITY_FACTOR,
1736                                      sizeof(int));
1737         if (!sd)
1738             return AVERROR(ENOMEM);
1739         *(int *)sd = s->current_picture.f->quality;
1740
1741         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1742             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1743
1744         if (avctx->rc_buffer_size) {
1745             RateControlContext *rcc = &s->rc_context;
1746             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1747
1748             if (put_bits_count(&s->pb) > max_size &&
1749                 s->lambda < s->lmax) {
1750                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1751                                        (s->qscale + 1) / s->qscale);
1752                 if (s->adaptive_quant) {
1753                     int i;
1754                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1755                         s->lambda_table[i] =
1756                             FFMAX(s->lambda_table[i] + 1,
1757                                   s->lambda_table[i] * (s->qscale + 1) /
1758                                   s->qscale);
1759                 }
1760                 s->mb_skipped = 0;        // done in frame_start()
1761                 // done in encode_picture() so we must undo it
1762                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1763                     if (s->flipflop_rounding          ||
1764                         s->codec_id == AV_CODEC_ID_H263P ||
1765                         s->codec_id == AV_CODEC_ID_MPEG4)
1766                         s->no_rounding ^= 1;
1767                 }
1768                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1769                     s->time_base       = s->last_time_base;
1770                     s->last_non_b_time = s->time - s->pp_time;
1771                 }
1772                 for (i = 0; i < context_count; i++) {
1773                     PutBitContext *pb = &s->thread_context[i]->pb;
1774                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1775                 }
1776                 goto vbv_retry;
1777             }
1778
1779             assert(s->avctx->rc_max_rate);
1780         }
1781
1782         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1783             ff_write_pass1_stats(s);
1784
1785         for (i = 0; i < 4; i++) {
1786             s->current_picture_ptr->encoding_error[i] = s->current_picture.encoding_error[i];
1787             avctx->error[i] += s->current_picture_ptr->encoding_error[i];
1788         }
1789
1790         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1791             assert(put_bits_count(&s->pb) == s->header_bits + s->mv_bits +
1792                                              s->misc_bits + s->i_tex_bits +
1793                                              s->p_tex_bits);
1794         flush_put_bits(&s->pb);
1795         s->frame_bits  = put_bits_count(&s->pb);
1796
1797         stuffing_count = ff_vbv_update(s, s->frame_bits);
1798         if (stuffing_count) {
1799             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1800                     stuffing_count + 50) {
1801                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1802                 return -1;
1803             }
1804
1805             switch (s->codec_id) {
1806             case AV_CODEC_ID_MPEG1VIDEO:
1807             case AV_CODEC_ID_MPEG2VIDEO:
1808                 while (stuffing_count--) {
1809                     put_bits(&s->pb, 8, 0);
1810                 }
1811             break;
1812             case AV_CODEC_ID_MPEG4:
1813                 put_bits(&s->pb, 16, 0);
1814                 put_bits(&s->pb, 16, 0x1C3);
1815                 stuffing_count -= 4;
1816                 while (stuffing_count--) {
1817                     put_bits(&s->pb, 8, 0xFF);
1818                 }
1819             break;
1820             default:
1821                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1822             }
1823             flush_put_bits(&s->pb);
1824             s->frame_bits  = put_bits_count(&s->pb);
1825         }
1826
1827         /* update mpeg1/2 vbv_delay for CBR */
1828         if (s->avctx->rc_max_rate                          &&
1829             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1830             s->out_format == FMT_MPEG1                     &&
1831             90000LL * (avctx->rc_buffer_size - 1) <=
1832                 s->avctx->rc_max_rate * 0xFFFFLL) {
1833             AVCPBProperties *props;
1834             size_t props_size;
1835
1836             int vbv_delay, min_delay;
1837             double inbits  = s->avctx->rc_max_rate *
1838                              av_q2d(s->avctx->time_base);
1839             int    minbits = s->frame_bits - 8 *
1840                              (s->vbv_delay_ptr - s->pb.buf - 1);
1841             double bits    = s->rc_context.buffer_index + minbits - inbits;
1842
1843             if (bits < 0)
1844                 av_log(s->avctx, AV_LOG_ERROR,
1845                        "Internal error, negative bits\n");
1846
1847             assert(s->repeat_first_field == 0);
1848
1849             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1850             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1851                         s->avctx->rc_max_rate;
1852
1853             vbv_delay = FFMAX(vbv_delay, min_delay);
1854
1855             assert(vbv_delay < 0xFFFF);
1856
1857             s->vbv_delay_ptr[0] &= 0xF8;
1858             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1859             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1860             s->vbv_delay_ptr[2] &= 0x07;
1861             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1862
1863             props = av_cpb_properties_alloc(&props_size);
1864             if (!props)
1865                 return AVERROR(ENOMEM);
1866             props->vbv_delay = vbv_delay * 300;
1867
1868             ret = av_packet_add_side_data(pkt, AV_PKT_DATA_CPB_PROPERTIES,
1869                                           (uint8_t*)props, props_size);
1870             if (ret < 0) {
1871                 av_freep(&props);
1872                 return ret;
1873             }
1874
1875 #if FF_API_VBV_DELAY
1876 FF_DISABLE_DEPRECATION_WARNINGS
1877             avctx->vbv_delay     = vbv_delay * 300;
1878 FF_ENABLE_DEPRECATION_WARNINGS
1879 #endif
1880         }
1881         s->total_bits     += s->frame_bits;
1882 #if FF_API_STAT_BITS
1883 FF_DISABLE_DEPRECATION_WARNINGS
1884         avctx->frame_bits  = s->frame_bits;
1885 FF_ENABLE_DEPRECATION_WARNINGS
1886 #endif
1887
1888
1889         pkt->pts = s->current_picture.f->pts;
1890         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1891             if (!s->current_picture.f->coded_picture_number)
1892                 pkt->dts = pkt->pts - s->dts_delta;
1893             else
1894                 pkt->dts = s->reordered_pts;
1895             s->reordered_pts = pkt->pts;
1896         } else
1897             pkt->dts = pkt->pts;
1898         if (s->current_picture.f->key_frame)
1899             pkt->flags |= AV_PKT_FLAG_KEY;
1900         if (s->mb_info)
1901             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1902     } else {
1903         s->frame_bits = 0;
1904     }
1905     assert((s->frame_bits & 7) == 0);
1906
1907     pkt->size = s->frame_bits / 8;
1908     *got_packet = !!pkt->size;
1909     return 0;
1910 }
1911
1912 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1913                                                 int n, int threshold)
1914 {
1915     static const char tab[64] = {
1916         3, 2, 2, 1, 1, 1, 1, 1,
1917         1, 1, 1, 1, 1, 1, 1, 1,
1918         1, 1, 1, 1, 1, 1, 1, 1,
1919         0, 0, 0, 0, 0, 0, 0, 0,
1920         0, 0, 0, 0, 0, 0, 0, 0,
1921         0, 0, 0, 0, 0, 0, 0, 0,
1922         0, 0, 0, 0, 0, 0, 0, 0,
1923         0, 0, 0, 0, 0, 0, 0, 0
1924     };
1925     int score = 0;
1926     int run = 0;
1927     int i;
1928     int16_t *block = s->block[n];
1929     const int last_index = s->block_last_index[n];
1930     int skip_dc;
1931
1932     if (threshold < 0) {
1933         skip_dc = 0;
1934         threshold = -threshold;
1935     } else
1936         skip_dc = 1;
1937
1938     /* Are all we could set to zero already zero? */
1939     if (last_index <= skip_dc - 1)
1940         return;
1941
1942     for (i = 0; i <= last_index; i++) {
1943         const int j = s->intra_scantable.permutated[i];
1944         const int level = FFABS(block[j]);
1945         if (level == 1) {
1946             if (skip_dc && i == 0)
1947                 continue;
1948             score += tab[run];
1949             run = 0;
1950         } else if (level > 1) {
1951             return;
1952         } else {
1953             run++;
1954         }
1955     }
1956     if (score >= threshold)
1957         return;
1958     for (i = skip_dc; i <= last_index; i++) {
1959         const int j = s->intra_scantable.permutated[i];
1960         block[j] = 0;
1961     }
1962     if (block[0])
1963         s->block_last_index[n] = 0;
1964     else
1965         s->block_last_index[n] = -1;
1966 }
1967
1968 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1969                                int last_index)
1970 {
1971     int i;
1972     const int maxlevel = s->max_qcoeff;
1973     const int minlevel = s->min_qcoeff;
1974     int overflow = 0;
1975
1976     if (s->mb_intra) {
1977         i = 1; // skip clipping of intra dc
1978     } else
1979         i = 0;
1980
1981     for (; i <= last_index; i++) {
1982         const int j = s->intra_scantable.permutated[i];
1983         int level = block[j];
1984
1985         if (level > maxlevel) {
1986             level = maxlevel;
1987             overflow++;
1988         } else if (level < minlevel) {
1989             level = minlevel;
1990             overflow++;
1991         }
1992
1993         block[j] = level;
1994     }
1995
1996     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1997         av_log(s->avctx, AV_LOG_INFO,
1998                "warning, clipping %d dct coefficients to %d..%d\n",
1999                overflow, minlevel, maxlevel);
2000 }
2001
2002 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2003 {
2004     int x, y;
2005     // FIXME optimize
2006     for (y = 0; y < 8; y++) {
2007         for (x = 0; x < 8; x++) {
2008             int x2, y2;
2009             int sum = 0;
2010             int sqr = 0;
2011             int count = 0;
2012
2013             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2014                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2015                     int v = ptr[x2 + y2 * stride];
2016                     sum += v;
2017                     sqr += v * v;
2018                     count++;
2019                 }
2020             }
2021             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2022         }
2023     }
2024 }
2025
2026 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2027                                                 int motion_x, int motion_y,
2028                                                 int mb_block_height,
2029                                                 int mb_block_count)
2030 {
2031     int16_t weight[8][64];
2032     int16_t orig[8][64];
2033     const int mb_x = s->mb_x;
2034     const int mb_y = s->mb_y;
2035     int i;
2036     int skip_dct[8];
2037     int dct_offset = s->linesize * 8; // default for progressive frames
2038     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2039     ptrdiff_t wrap_y, wrap_c;
2040
2041     for (i = 0; i < mb_block_count; i++)
2042         skip_dct[i] = s->skipdct;
2043
2044     if (s->adaptive_quant) {
2045         const int last_qp = s->qscale;
2046         const int mb_xy = mb_x + mb_y * s->mb_stride;
2047
2048         s->lambda = s->lambda_table[mb_xy];
2049         update_qscale(s);
2050
2051         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2052             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2053             s->dquant = s->qscale - last_qp;
2054
2055             if (s->out_format == FMT_H263) {
2056                 s->dquant = av_clip(s->dquant, -2, 2);
2057
2058                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2059                     if (!s->mb_intra) {
2060                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2061                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2062                                 s->dquant = 0;
2063                         }
2064                         if (s->mv_type == MV_TYPE_8X8)
2065                             s->dquant = 0;
2066                     }
2067                 }
2068             }
2069         }
2070         ff_set_qscale(s, last_qp + s->dquant);
2071     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2072         ff_set_qscale(s, s->qscale + s->dquant);
2073
2074     wrap_y = s->linesize;
2075     wrap_c = s->uvlinesize;
2076     ptr_y  = s->new_picture.f->data[0] +
2077              (mb_y * 16 * wrap_y)              + mb_x * 16;
2078     ptr_cb = s->new_picture.f->data[1] +
2079              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
2080     ptr_cr = s->new_picture.f->data[2] +
2081              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
2082
2083     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
2084         uint8_t *ebuf = s->sc.edge_emu_buffer + 32;
2085         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2086                                  wrap_y, wrap_y,
2087                                  16, 16, mb_x * 16, mb_y * 16,
2088                                  s->width, s->height);
2089         ptr_y = ebuf;
2090         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
2091                                  wrap_c, wrap_c,
2092                                  8, mb_block_height, mb_x * 8, mb_y * 8,
2093                                  s->width >> 1, s->height >> 1);
2094         ptr_cb = ebuf + 18 * wrap_y;
2095         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
2096                                  wrap_c, wrap_c,
2097                                  8, mb_block_height, mb_x * 8, mb_y * 8,
2098                                  s->width >> 1, s->height >> 1);
2099         ptr_cr = ebuf + 18 * wrap_y + 8;
2100     }
2101
2102     if (s->mb_intra) {
2103         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2104             int progressive_score, interlaced_score;
2105
2106             s->interlaced_dct = 0;
2107             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2108                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2109                                                      NULL, wrap_y, 8) - 400;
2110
2111             if (progressive_score > 0) {
2112                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2113                                                         NULL, wrap_y * 2, 8) +
2114                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2115                                                         NULL, wrap_y * 2, 8);
2116                 if (progressive_score > interlaced_score) {
2117                     s->interlaced_dct = 1;
2118
2119                     dct_offset = wrap_y;
2120                     wrap_y <<= 1;
2121                     if (s->chroma_format == CHROMA_422)
2122                         wrap_c <<= 1;
2123                 }
2124             }
2125         }
2126
2127         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2128         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2129         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2130         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2131
2132         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2133             skip_dct[4] = 1;
2134             skip_dct[5] = 1;
2135         } else {
2136             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2137             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2138             if (!s->chroma_y_shift) { /* 422 */
2139                 s->pdsp.get_pixels(s->block[6],
2140                                    ptr_cb + (dct_offset >> 1), wrap_c);
2141                 s->pdsp.get_pixels(s->block[7],
2142                                    ptr_cr + (dct_offset >> 1), wrap_c);
2143             }
2144         }
2145     } else {
2146         op_pixels_func (*op_pix)[4];
2147         qpel_mc_func (*op_qpix)[16];
2148         uint8_t *dest_y, *dest_cb, *dest_cr;
2149
2150         dest_y  = s->dest[0];
2151         dest_cb = s->dest[1];
2152         dest_cr = s->dest[2];
2153
2154         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2155             op_pix  = s->hdsp.put_pixels_tab;
2156             op_qpix = s->qdsp.put_qpel_pixels_tab;
2157         } else {
2158             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2159             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2160         }
2161
2162         if (s->mv_dir & MV_DIR_FORWARD) {
2163             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2164                           s->last_picture.f->data,
2165                           op_pix, op_qpix);
2166             op_pix  = s->hdsp.avg_pixels_tab;
2167             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2168         }
2169         if (s->mv_dir & MV_DIR_BACKWARD) {
2170             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2171                           s->next_picture.f->data,
2172                           op_pix, op_qpix);
2173         }
2174
2175         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2176             int progressive_score, interlaced_score;
2177
2178             s->interlaced_dct = 0;
2179             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2180                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2181                                                      ptr_y + wrap_y * 8,
2182                                                      wrap_y, 8) - 400;
2183
2184             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2185                 progressive_score -= 400;
2186
2187             if (progressive_score > 0) {
2188                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2189                                                         wrap_y * 2, 8) +
2190                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2191                                                         ptr_y + wrap_y,
2192                                                         wrap_y * 2, 8);
2193
2194                 if (progressive_score > interlaced_score) {
2195                     s->interlaced_dct = 1;
2196
2197                     dct_offset = wrap_y;
2198                     wrap_y <<= 1;
2199                     if (s->chroma_format == CHROMA_422)
2200                         wrap_c <<= 1;
2201                 }
2202             }
2203         }
2204
2205         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2206         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2207         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2208                             dest_y + dct_offset, wrap_y);
2209         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2210                             dest_y + dct_offset + 8, wrap_y);
2211
2212         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2213             skip_dct[4] = 1;
2214             skip_dct[5] = 1;
2215         } else {
2216             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2217             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2218             if (!s->chroma_y_shift) { /* 422 */
2219                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2220                                     dest_cb + (dct_offset >> 1), wrap_c);
2221                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2222                                     dest_cr + (dct_offset >> 1), wrap_c);
2223             }
2224         }
2225         /* pre quantization */
2226         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2227                 2 * s->qscale * s->qscale) {
2228             // FIXME optimize
2229             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2230                 skip_dct[0] = 1;
2231             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2232                 skip_dct[1] = 1;
2233             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2234                                wrap_y, 8) < 20 * s->qscale)
2235                 skip_dct[2] = 1;
2236             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2237                                wrap_y, 8) < 20 * s->qscale)
2238                 skip_dct[3] = 1;
2239             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2240                 skip_dct[4] = 1;
2241             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2242                 skip_dct[5] = 1;
2243             if (!s->chroma_y_shift) { /* 422 */
2244                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2245                                    dest_cb + (dct_offset >> 1),
2246                                    wrap_c, 8) < 20 * s->qscale)
2247                     skip_dct[6] = 1;
2248                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2249                                    dest_cr + (dct_offset >> 1),
2250                                    wrap_c, 8) < 20 * s->qscale)
2251                     skip_dct[7] = 1;
2252             }
2253         }
2254     }
2255
2256     if (s->quantizer_noise_shaping) {
2257         if (!skip_dct[0])
2258             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2259         if (!skip_dct[1])
2260             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2261         if (!skip_dct[2])
2262             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2263         if (!skip_dct[3])
2264             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2265         if (!skip_dct[4])
2266             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2267         if (!skip_dct[5])
2268             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2269         if (!s->chroma_y_shift) { /* 422 */
2270             if (!skip_dct[6])
2271                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2272                                   wrap_c);
2273             if (!skip_dct[7])
2274                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2275                                   wrap_c);
2276         }
2277         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2278     }
2279
2280     /* DCT & quantize */
2281     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2282     {
2283         for (i = 0; i < mb_block_count; i++) {
2284             if (!skip_dct[i]) {
2285                 int overflow;
2286                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2287                 // FIXME we could decide to change to quantizer instead of
2288                 // clipping
2289                 // JS: I don't think that would be a good idea it could lower
2290                 //     quality instead of improve it. Just INTRADC clipping
2291                 //     deserves changes in quantizer
2292                 if (overflow)
2293                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2294             } else
2295                 s->block_last_index[i] = -1;
2296         }
2297         if (s->quantizer_noise_shaping) {
2298             for (i = 0; i < mb_block_count; i++) {
2299                 if (!skip_dct[i]) {
2300                     s->block_last_index[i] =
2301                         dct_quantize_refine(s, s->block[i], weight[i],
2302                                             orig[i], i, s->qscale);
2303                 }
2304             }
2305         }
2306
2307         if (s->luma_elim_threshold && !s->mb_intra)
2308             for (i = 0; i < 4; i++)
2309                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2310         if (s->chroma_elim_threshold && !s->mb_intra)
2311             for (i = 4; i < mb_block_count; i++)
2312                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2313
2314         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2315             for (i = 0; i < mb_block_count; i++) {
2316                 if (s->block_last_index[i] == -1)
2317                     s->coded_score[i] = INT_MAX / 256;
2318             }
2319         }
2320     }
2321
2322     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2323         s->block_last_index[4] =
2324         s->block_last_index[5] = 0;
2325         s->block[4][0] =
2326         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2327     }
2328
2329     // non c quantize code returns incorrect block_last_index FIXME
2330     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2331         for (i = 0; i < mb_block_count; i++) {
2332             int j;
2333             if (s->block_last_index[i] > 0) {
2334                 for (j = 63; j > 0; j--) {
2335                     if (s->block[i][s->intra_scantable.permutated[j]])
2336                         break;
2337                 }
2338                 s->block_last_index[i] = j;
2339             }
2340         }
2341     }
2342
2343     /* huffman encode */
2344     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2345     case AV_CODEC_ID_MPEG1VIDEO:
2346     case AV_CODEC_ID_MPEG2VIDEO:
2347         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2348             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2349         break;
2350     case AV_CODEC_ID_MPEG4:
2351         if (CONFIG_MPEG4_ENCODER)
2352             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2353         break;
2354     case AV_CODEC_ID_MSMPEG4V2:
2355     case AV_CODEC_ID_MSMPEG4V3:
2356     case AV_CODEC_ID_WMV1:
2357         if (CONFIG_MSMPEG4_ENCODER)
2358             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2359         break;
2360     case AV_CODEC_ID_WMV2:
2361         if (CONFIG_WMV2_ENCODER)
2362             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2363         break;
2364     case AV_CODEC_ID_H261:
2365         if (CONFIG_H261_ENCODER)
2366             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2367         break;
2368     case AV_CODEC_ID_H263:
2369     case AV_CODEC_ID_H263P:
2370     case AV_CODEC_ID_FLV1:
2371     case AV_CODEC_ID_RV10:
2372     case AV_CODEC_ID_RV20:
2373         if (CONFIG_H263_ENCODER)
2374             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2375         break;
2376     case AV_CODEC_ID_MJPEG:
2377         if (CONFIG_MJPEG_ENCODER)
2378             ff_mjpeg_encode_mb(s, s->block);
2379         break;
2380     default:
2381         assert(0);
2382     }
2383 }
2384
2385 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2386 {
2387     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2388     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2389 }
2390
2391 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2392     int i;
2393
2394     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2395
2396     /* mpeg1 */
2397     d->mb_skip_run= s->mb_skip_run;
2398     for(i=0; i<3; i++)
2399         d->last_dc[i] = s->last_dc[i];
2400
2401     /* statistics */
2402     d->mv_bits= s->mv_bits;
2403     d->i_tex_bits= s->i_tex_bits;
2404     d->p_tex_bits= s->p_tex_bits;
2405     d->i_count= s->i_count;
2406     d->f_count= s->f_count;
2407     d->b_count= s->b_count;
2408     d->skip_count= s->skip_count;
2409     d->misc_bits= s->misc_bits;
2410     d->last_bits= 0;
2411
2412     d->mb_skipped= 0;
2413     d->qscale= s->qscale;
2414     d->dquant= s->dquant;
2415
2416     d->esc3_level_length= s->esc3_level_length;
2417 }
2418
2419 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2420     int i;
2421
2422     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2423     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2424
2425     /* mpeg1 */
2426     d->mb_skip_run= s->mb_skip_run;
2427     for(i=0; i<3; i++)
2428         d->last_dc[i] = s->last_dc[i];
2429
2430     /* statistics */
2431     d->mv_bits= s->mv_bits;
2432     d->i_tex_bits= s->i_tex_bits;
2433     d->p_tex_bits= s->p_tex_bits;
2434     d->i_count= s->i_count;
2435     d->f_count= s->f_count;
2436     d->b_count= s->b_count;
2437     d->skip_count= s->skip_count;
2438     d->misc_bits= s->misc_bits;
2439
2440     d->mb_intra= s->mb_intra;
2441     d->mb_skipped= s->mb_skipped;
2442     d->mv_type= s->mv_type;
2443     d->mv_dir= s->mv_dir;
2444     d->pb= s->pb;
2445     if(s->data_partitioning){
2446         d->pb2= s->pb2;
2447         d->tex_pb= s->tex_pb;
2448     }
2449     d->block= s->block;
2450     for(i=0; i<8; i++)
2451         d->block_last_index[i]= s->block_last_index[i];
2452     d->interlaced_dct= s->interlaced_dct;
2453     d->qscale= s->qscale;
2454
2455     d->esc3_level_length= s->esc3_level_length;
2456 }
2457
2458 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2459                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2460                            int *dmin, int *next_block, int motion_x, int motion_y)
2461 {
2462     int score;
2463     uint8_t *dest_backup[3];
2464
2465     copy_context_before_encode(s, backup, type);
2466
2467     s->block= s->blocks[*next_block];
2468     s->pb= pb[*next_block];
2469     if(s->data_partitioning){
2470         s->pb2   = pb2   [*next_block];
2471         s->tex_pb= tex_pb[*next_block];
2472     }
2473
2474     if(*next_block){
2475         memcpy(dest_backup, s->dest, sizeof(s->dest));
2476         s->dest[0] = s->sc.rd_scratchpad;
2477         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2478         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2479         assert(s->linesize >= 32); //FIXME
2480     }
2481
2482     encode_mb(s, motion_x, motion_y);
2483
2484     score= put_bits_count(&s->pb);
2485     if(s->data_partitioning){
2486         score+= put_bits_count(&s->pb2);
2487         score+= put_bits_count(&s->tex_pb);
2488     }
2489
2490     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2491         ff_mpv_decode_mb(s, s->block);
2492
2493         score *= s->lambda2;
2494         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2495     }
2496
2497     if(*next_block){
2498         memcpy(s->dest, dest_backup, sizeof(s->dest));
2499     }
2500
2501     if(score<*dmin){
2502         *dmin= score;
2503         *next_block^=1;
2504
2505         copy_context_after_encode(best, s, type);
2506     }
2507 }
2508
2509 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2510     uint32_t *sq = ff_square_tab + 256;
2511     int acc=0;
2512     int x,y;
2513
2514     if(w==16 && h==16)
2515         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2516     else if(w==8 && h==8)
2517         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2518
2519     for(y=0; y<h; y++){
2520         for(x=0; x<w; x++){
2521             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2522         }
2523     }
2524
2525     assert(acc>=0);
2526
2527     return acc;
2528 }
2529
2530 static int sse_mb(MpegEncContext *s){
2531     int w= 16;
2532     int h= 16;
2533
2534     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2535     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2536
2537     if(w==16 && h==16)
2538       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2539         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2540                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2541                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2542       }else{
2543         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2544                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2545                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2546       }
2547     else
2548         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2549                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2550                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2551 }
2552
2553 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2554     MpegEncContext *s= *(void**)arg;
2555
2556
2557     s->me.pre_pass=1;
2558     s->me.dia_size= s->avctx->pre_dia_size;
2559     s->first_slice_line=1;
2560     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2561         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2562             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2563         }
2564         s->first_slice_line=0;
2565     }
2566
2567     s->me.pre_pass=0;
2568
2569     return 0;
2570 }
2571
2572 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2573     MpegEncContext *s= *(void**)arg;
2574
2575     s->me.dia_size= s->avctx->dia_size;
2576     s->first_slice_line=1;
2577     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2578         s->mb_x=0; //for block init below
2579         ff_init_block_index(s);
2580         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2581             s->block_index[0]+=2;
2582             s->block_index[1]+=2;
2583             s->block_index[2]+=2;
2584             s->block_index[3]+=2;
2585
2586             /* compute motion vector & mb_type and store in context */
2587             if(s->pict_type==AV_PICTURE_TYPE_B)
2588                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2589             else
2590                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2591         }
2592         s->first_slice_line=0;
2593     }
2594     return 0;
2595 }
2596
2597 static int mb_var_thread(AVCodecContext *c, void *arg){
2598     MpegEncContext *s= *(void**)arg;
2599     int mb_x, mb_y;
2600
2601     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2602         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2603             int xx = mb_x * 16;
2604             int yy = mb_y * 16;
2605             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2606             int varc;
2607             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2608
2609             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2610                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2611
2612             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2613             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2614             s->me.mb_var_sum_temp    += varc;
2615         }
2616     }
2617     return 0;
2618 }
2619
2620 static void write_slice_end(MpegEncContext *s){
2621     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2622         if(s->partitioned_frame){
2623             ff_mpeg4_merge_partitions(s);
2624         }
2625
2626         ff_mpeg4_stuffing(&s->pb);
2627     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2628         ff_mjpeg_encode_stuffing(&s->pb);
2629     }
2630
2631     avpriv_align_put_bits(&s->pb);
2632     flush_put_bits(&s->pb);
2633
2634     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2635         s->misc_bits+= get_bits_diff(s);
2636 }
2637
2638 static void write_mb_info(MpegEncContext *s)
2639 {
2640     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2641     int offset = put_bits_count(&s->pb);
2642     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2643     int gobn = s->mb_y / s->gob_index;
2644     int pred_x, pred_y;
2645     if (CONFIG_H263_ENCODER)
2646         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2647     bytestream_put_le32(&ptr, offset);
2648     bytestream_put_byte(&ptr, s->qscale);
2649     bytestream_put_byte(&ptr, gobn);
2650     bytestream_put_le16(&ptr, mba);
2651     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2652     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2653     /* 4MV not implemented */
2654     bytestream_put_byte(&ptr, 0); /* hmv2 */
2655     bytestream_put_byte(&ptr, 0); /* vmv2 */
2656 }
2657
2658 static void update_mb_info(MpegEncContext *s, int startcode)
2659 {
2660     if (!s->mb_info)
2661         return;
2662     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2663         s->mb_info_size += 12;
2664         s->prev_mb_info = s->last_mb_info;
2665     }
2666     if (startcode) {
2667         s->prev_mb_info = put_bits_count(&s->pb)/8;
2668         /* This might have incremented mb_info_size above, and we return without
2669          * actually writing any info into that slot yet. But in that case,
2670          * this will be called again at the start of the after writing the
2671          * start code, actually writing the mb info. */
2672         return;
2673     }
2674
2675     s->last_mb_info = put_bits_count(&s->pb)/8;
2676     if (!s->mb_info_size)
2677         s->mb_info_size += 12;
2678     write_mb_info(s);
2679 }
2680
2681 static int encode_thread(AVCodecContext *c, void *arg){
2682     MpegEncContext *s= *(void**)arg;
2683     int mb_x, mb_y, pdif = 0;
2684     int chr_h= 16>>s->chroma_y_shift;
2685     int i, j;
2686     MpegEncContext best_s = { 0 }, backup_s;
2687     uint8_t bit_buf[2][MAX_MB_BYTES];
2688     uint8_t bit_buf2[2][MAX_MB_BYTES];
2689     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2690     PutBitContext pb[2], pb2[2], tex_pb[2];
2691
2692     for(i=0; i<2; i++){
2693         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2694         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2695         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2696     }
2697
2698     s->last_bits= put_bits_count(&s->pb);
2699     s->mv_bits=0;
2700     s->misc_bits=0;
2701     s->i_tex_bits=0;
2702     s->p_tex_bits=0;
2703     s->i_count=0;
2704     s->f_count=0;
2705     s->b_count=0;
2706     s->skip_count=0;
2707
2708     for(i=0; i<3; i++){
2709         /* init last dc values */
2710         /* note: quant matrix value (8) is implied here */
2711         s->last_dc[i] = 128 << s->intra_dc_precision;
2712
2713         s->current_picture.encoding_error[i] = 0;
2714     }
2715     s->mb_skip_run = 0;
2716     memset(s->last_mv, 0, sizeof(s->last_mv));
2717
2718     s->last_mv_dir = 0;
2719
2720     switch(s->codec_id){
2721     case AV_CODEC_ID_H263:
2722     case AV_CODEC_ID_H263P:
2723     case AV_CODEC_ID_FLV1:
2724         if (CONFIG_H263_ENCODER)
2725             s->gob_index = H263_GOB_HEIGHT(s->height);
2726         break;
2727     case AV_CODEC_ID_MPEG4:
2728         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2729             ff_mpeg4_init_partitions(s);
2730         break;
2731     }
2732
2733     s->resync_mb_x=0;
2734     s->resync_mb_y=0;
2735     s->first_slice_line = 1;
2736     s->ptr_lastgob = s->pb.buf;
2737     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2738         s->mb_x=0;
2739         s->mb_y= mb_y;
2740
2741         ff_set_qscale(s, s->qscale);
2742         ff_init_block_index(s);
2743
2744         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2745             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2746             int mb_type= s->mb_type[xy];
2747 //            int d;
2748             int dmin= INT_MAX;
2749             int dir;
2750
2751             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2752                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2753                 return -1;
2754             }
2755             if(s->data_partitioning){
2756                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2757                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2758                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2759                     return -1;
2760                 }
2761             }
2762
2763             s->mb_x = mb_x;
2764             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2765             ff_update_block_index(s);
2766
2767             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2768                 ff_h261_reorder_mb_index(s);
2769                 xy= s->mb_y*s->mb_stride + s->mb_x;
2770                 mb_type= s->mb_type[xy];
2771             }
2772
2773             /* write gob / video packet header  */
2774             if(s->rtp_mode){
2775                 int current_packet_size, is_gob_start;
2776
2777                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2778
2779                 is_gob_start = s->rtp_payload_size &&
2780                                current_packet_size >= s->rtp_payload_size &&
2781                                mb_y + mb_x > 0;
2782
2783                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2784
2785                 switch(s->codec_id){
2786                 case AV_CODEC_ID_H263:
2787                 case AV_CODEC_ID_H263P:
2788                     if(!s->h263_slice_structured)
2789                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2790                     break;
2791                 case AV_CODEC_ID_MPEG2VIDEO:
2792                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2793                 case AV_CODEC_ID_MPEG1VIDEO:
2794                     if(s->mb_skip_run) is_gob_start=0;
2795                     break;
2796                 }
2797
2798                 if(is_gob_start){
2799                     if(s->start_mb_y != mb_y || mb_x!=0){
2800                         write_slice_end(s);
2801
2802                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2803                             ff_mpeg4_init_partitions(s);
2804                         }
2805                     }
2806
2807                     assert((put_bits_count(&s->pb)&7) == 0);
2808                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2809
2810                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2811                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2812                         int d = 100 / s->error_rate;
2813                         if(r % d == 0){
2814                             current_packet_size=0;
2815                             s->pb.buf_ptr= s->ptr_lastgob;
2816                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2817                         }
2818                     }
2819
2820 #if FF_API_RTP_CALLBACK
2821 FF_DISABLE_DEPRECATION_WARNINGS
2822                     if (s->avctx->rtp_callback){
2823                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2824                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2825                     }
2826 FF_ENABLE_DEPRECATION_WARNINGS
2827 #endif
2828                     update_mb_info(s, 1);
2829
2830                     switch(s->codec_id){
2831                     case AV_CODEC_ID_MPEG4:
2832                         if (CONFIG_MPEG4_ENCODER) {
2833                             ff_mpeg4_encode_video_packet_header(s);
2834                             ff_mpeg4_clean_buffers(s);
2835                         }
2836                     break;
2837                     case AV_CODEC_ID_MPEG1VIDEO:
2838                     case AV_CODEC_ID_MPEG2VIDEO:
2839                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2840                             ff_mpeg1_encode_slice_header(s);
2841                             ff_mpeg1_clean_buffers(s);
2842                         }
2843                     break;
2844                     case AV_CODEC_ID_H263:
2845                     case AV_CODEC_ID_H263P:
2846                         if (CONFIG_H263_ENCODER)
2847                             ff_h263_encode_gob_header(s, mb_y);
2848                     break;
2849                     }
2850
2851                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2852                         int bits= put_bits_count(&s->pb);
2853                         s->misc_bits+= bits - s->last_bits;
2854                         s->last_bits= bits;
2855                     }
2856
2857                     s->ptr_lastgob += current_packet_size;
2858                     s->first_slice_line=1;
2859                     s->resync_mb_x=mb_x;
2860                     s->resync_mb_y=mb_y;
2861                 }
2862             }
2863
2864             if(  (s->resync_mb_x   == s->mb_x)
2865                && s->resync_mb_y+1 == s->mb_y){
2866                 s->first_slice_line=0;
2867             }
2868
2869             s->mb_skipped=0;
2870             s->dquant=0; //only for QP_RD
2871
2872             update_mb_info(s, 0);
2873
2874             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2875                 int next_block=0;
2876                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2877
2878                 copy_context_before_encode(&backup_s, s, -1);
2879                 backup_s.pb= s->pb;
2880                 best_s.data_partitioning= s->data_partitioning;
2881                 best_s.partitioned_frame= s->partitioned_frame;
2882                 if(s->data_partitioning){
2883                     backup_s.pb2= s->pb2;
2884                     backup_s.tex_pb= s->tex_pb;
2885                 }
2886
2887                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2888                     s->mv_dir = MV_DIR_FORWARD;
2889                     s->mv_type = MV_TYPE_16X16;
2890                     s->mb_intra= 0;
2891                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2892                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2893                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2894                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2895                 }
2896                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2897                     s->mv_dir = MV_DIR_FORWARD;
2898                     s->mv_type = MV_TYPE_FIELD;
2899                     s->mb_intra= 0;
2900                     for(i=0; i<2; i++){
2901                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2902                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2903                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2904                     }
2905                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2906                                  &dmin, &next_block, 0, 0);
2907                 }
2908                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2909                     s->mv_dir = MV_DIR_FORWARD;
2910                     s->mv_type = MV_TYPE_16X16;
2911                     s->mb_intra= 0;
2912                     s->mv[0][0][0] = 0;
2913                     s->mv[0][0][1] = 0;
2914                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2915                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2916                 }
2917                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2918                     s->mv_dir = MV_DIR_FORWARD;
2919                     s->mv_type = MV_TYPE_8X8;
2920                     s->mb_intra= 0;
2921                     for(i=0; i<4; i++){
2922                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2923                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2924                     }
2925                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2926                                  &dmin, &next_block, 0, 0);
2927                 }
2928                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2929                     s->mv_dir = MV_DIR_FORWARD;
2930                     s->mv_type = MV_TYPE_16X16;
2931                     s->mb_intra= 0;
2932                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2933                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2934                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2935                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2936                 }
2937                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2938                     s->mv_dir = MV_DIR_BACKWARD;
2939                     s->mv_type = MV_TYPE_16X16;
2940                     s->mb_intra= 0;
2941                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2942                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2943                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2944                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2945                 }
2946                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2947                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2948                     s->mv_type = MV_TYPE_16X16;
2949                     s->mb_intra= 0;
2950                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2951                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2952                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2953                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2954                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2955                                  &dmin, &next_block, 0, 0);
2956                 }
2957                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2958                     s->mv_dir = MV_DIR_FORWARD;
2959                     s->mv_type = MV_TYPE_FIELD;
2960                     s->mb_intra= 0;
2961                     for(i=0; i<2; i++){
2962                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2963                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2964                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2965                     }
2966                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2967                                  &dmin, &next_block, 0, 0);
2968                 }
2969                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2970                     s->mv_dir = MV_DIR_BACKWARD;
2971                     s->mv_type = MV_TYPE_FIELD;
2972                     s->mb_intra= 0;
2973                     for(i=0; i<2; i++){
2974                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2975                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2976                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2977                     }
2978                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2979                                  &dmin, &next_block, 0, 0);
2980                 }
2981                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2982                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2983                     s->mv_type = MV_TYPE_FIELD;
2984                     s->mb_intra= 0;
2985                     for(dir=0; dir<2; dir++){
2986                         for(i=0; i<2; i++){
2987                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2988                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2989                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2990                         }
2991                     }
2992                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2993                                  &dmin, &next_block, 0, 0);
2994                 }
2995                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2996                     s->mv_dir = 0;
2997                     s->mv_type = MV_TYPE_16X16;
2998                     s->mb_intra= 1;
2999                     s->mv[0][0][0] = 0;
3000                     s->mv[0][0][1] = 0;
3001                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3002                                  &dmin, &next_block, 0, 0);
3003                     if(s->h263_pred || s->h263_aic){
3004                         if(best_s.mb_intra)
3005                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3006                         else
3007                             ff_clean_intra_table_entries(s); //old mode?
3008                     }
3009                 }
3010
3011                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3012                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3013                         const int last_qp= backup_s.qscale;
3014                         int qpi, qp, dc[6];
3015                         int16_t ac[6][16];
3016                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3017                         static const int dquant_tab[4]={-1,1,-2,2};
3018
3019                         assert(backup_s.dquant == 0);
3020
3021                         //FIXME intra
3022                         s->mv_dir= best_s.mv_dir;
3023                         s->mv_type = MV_TYPE_16X16;
3024                         s->mb_intra= best_s.mb_intra;
3025                         s->mv[0][0][0] = best_s.mv[0][0][0];
3026                         s->mv[0][0][1] = best_s.mv[0][0][1];
3027                         s->mv[1][0][0] = best_s.mv[1][0][0];
3028                         s->mv[1][0][1] = best_s.mv[1][0][1];
3029
3030                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3031                         for(; qpi<4; qpi++){
3032                             int dquant= dquant_tab[qpi];
3033                             qp= last_qp + dquant;
3034                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3035                                 continue;
3036                             backup_s.dquant= dquant;
3037                             if(s->mb_intra && s->dc_val[0]){
3038                                 for(i=0; i<6; i++){
3039                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3040                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3041                                 }
3042                             }
3043
3044                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3045                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3046                             if(best_s.qscale != qp){
3047                                 if(s->mb_intra && s->dc_val[0]){
3048                                     for(i=0; i<6; i++){
3049                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3050                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3051                                     }
3052                                 }
3053                             }
3054                         }
3055                     }
3056                 }
3057                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3058                     int mx= s->b_direct_mv_table[xy][0];
3059                     int my= s->b_direct_mv_table[xy][1];
3060
3061                     backup_s.dquant = 0;
3062                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3063                     s->mb_intra= 0;
3064                     ff_mpeg4_set_direct_mv(s, mx, my);
3065                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3066                                  &dmin, &next_block, mx, my);
3067                 }
3068                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3069                     backup_s.dquant = 0;
3070                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3071                     s->mb_intra= 0;
3072                     ff_mpeg4_set_direct_mv(s, 0, 0);
3073                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3074                                  &dmin, &next_block, 0, 0);
3075                 }
3076                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3077                     int coded=0;
3078                     for(i=0; i<6; i++)
3079                         coded |= s->block_last_index[i];
3080                     if(coded){
3081                         int mx,my;
3082                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3083                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3084                             mx=my=0; //FIXME find the one we actually used
3085                             ff_mpeg4_set_direct_mv(s, mx, my);
3086                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3087                             mx= s->mv[1][0][0];
3088                             my= s->mv[1][0][1];
3089                         }else{
3090                             mx= s->mv[0][0][0];
3091                             my= s->mv[0][0][1];
3092                         }
3093
3094                         s->mv_dir= best_s.mv_dir;
3095                         s->mv_type = best_s.mv_type;
3096                         s->mb_intra= 0;
3097 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3098                         s->mv[0][0][1] = best_s.mv[0][0][1];
3099                         s->mv[1][0][0] = best_s.mv[1][0][0];
3100                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3101                         backup_s.dquant= 0;
3102                         s->skipdct=1;
3103                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3104                                         &dmin, &next_block, mx, my);
3105                         s->skipdct=0;
3106                     }
3107                 }
3108
3109                 s->current_picture.qscale_table[xy] = best_s.qscale;
3110
3111                 copy_context_after_encode(s, &best_s, -1);
3112
3113                 pb_bits_count= put_bits_count(&s->pb);
3114                 flush_put_bits(&s->pb);
3115                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3116                 s->pb= backup_s.pb;
3117
3118                 if(s->data_partitioning){
3119                     pb2_bits_count= put_bits_count(&s->pb2);
3120                     flush_put_bits(&s->pb2);
3121                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3122                     s->pb2= backup_s.pb2;
3123
3124                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3125                     flush_put_bits(&s->tex_pb);
3126                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3127                     s->tex_pb= backup_s.tex_pb;
3128                 }
3129                 s->last_bits= put_bits_count(&s->pb);
3130
3131                 if (CONFIG_H263_ENCODER &&
3132                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3133                     ff_h263_update_motion_val(s);
3134
3135                 if(next_block==0){ //FIXME 16 vs linesize16
3136                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3137                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3138                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3139                 }
3140
3141                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3142                     ff_mpv_decode_mb(s, s->block);
3143             } else {
3144                 int motion_x = 0, motion_y = 0;
3145                 s->mv_type=MV_TYPE_16X16;
3146                 // only one MB-Type possible
3147
3148                 switch(mb_type){
3149                 case CANDIDATE_MB_TYPE_INTRA:
3150                     s->mv_dir = 0;
3151                     s->mb_intra= 1;
3152                     motion_x= s->mv[0][0][0] = 0;
3153                     motion_y= s->mv[0][0][1] = 0;
3154                     break;
3155                 case CANDIDATE_MB_TYPE_INTER:
3156                     s->mv_dir = MV_DIR_FORWARD;
3157                     s->mb_intra= 0;
3158                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3159                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3160                     break;
3161                 case CANDIDATE_MB_TYPE_INTER_I:
3162                     s->mv_dir = MV_DIR_FORWARD;
3163                     s->mv_type = MV_TYPE_FIELD;
3164                     s->mb_intra= 0;
3165                     for(i=0; i<2; i++){
3166                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3167                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3168                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3169                     }
3170                     break;
3171                 case CANDIDATE_MB_TYPE_INTER4V:
3172                     s->mv_dir = MV_DIR_FORWARD;
3173                     s->mv_type = MV_TYPE_8X8;
3174                     s->mb_intra= 0;
3175                     for(i=0; i<4; i++){
3176                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3177                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3178                     }
3179                     break;
3180                 case CANDIDATE_MB_TYPE_DIRECT:
3181                     if (CONFIG_MPEG4_ENCODER) {
3182                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3183                         s->mb_intra= 0;
3184                         motion_x=s->b_direct_mv_table[xy][0];
3185                         motion_y=s->b_direct_mv_table[xy][1];
3186                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3187                     }
3188                     break;
3189                 case CANDIDATE_MB_TYPE_DIRECT0:
3190                     if (CONFIG_MPEG4_ENCODER) {
3191                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3192                         s->mb_intra= 0;
3193                         ff_mpeg4_set_direct_mv(s, 0, 0);
3194                     }
3195                     break;
3196                 case CANDIDATE_MB_TYPE_BIDIR:
3197                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3198                     s->mb_intra= 0;
3199                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3200                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3201                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3202                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3203                     break;
3204                 case CANDIDATE_MB_TYPE_BACKWARD:
3205                     s->mv_dir = MV_DIR_BACKWARD;
3206                     s->mb_intra= 0;
3207                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3208                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3209                     break;
3210                 case CANDIDATE_MB_TYPE_FORWARD:
3211                     s->mv_dir = MV_DIR_FORWARD;
3212                     s->mb_intra= 0;
3213                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3214                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3215                     break;
3216                 case CANDIDATE_MB_TYPE_FORWARD_I:
3217                     s->mv_dir = MV_DIR_FORWARD;
3218                     s->mv_type = MV_TYPE_FIELD;
3219                     s->mb_intra= 0;
3220                     for(i=0; i<2; i++){
3221                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3222                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3223                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3224                     }
3225                     break;
3226                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3227                     s->mv_dir = MV_DIR_BACKWARD;
3228                     s->mv_type = MV_TYPE_FIELD;
3229                     s->mb_intra= 0;
3230                     for(i=0; i<2; i++){
3231                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3232                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3233                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3234                     }
3235                     break;
3236                 case CANDIDATE_MB_TYPE_BIDIR_I:
3237                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3238                     s->mv_type = MV_TYPE_FIELD;
3239                     s->mb_intra= 0;
3240                     for(dir=0; dir<2; dir++){
3241                         for(i=0; i<2; i++){
3242                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3243                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3244                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3245                         }
3246                     }
3247                     break;
3248                 default:
3249                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3250                 }
3251
3252                 encode_mb(s, motion_x, motion_y);
3253
3254                 // RAL: Update last macroblock type
3255                 s->last_mv_dir = s->mv_dir;
3256
3257                 if (CONFIG_H263_ENCODER &&
3258                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3259                     ff_h263_update_motion_val(s);
3260
3261                 ff_mpv_decode_mb(s, s->block);
3262             }
3263
3264             /* clean the MV table in IPS frames for direct mode in B frames */
3265             if(s->mb_intra /* && I,P,S_TYPE */){
3266                 s->p_mv_table[xy][0]=0;
3267                 s->p_mv_table[xy][1]=0;
3268             }
3269
3270             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3271                 int w= 16;
3272                 int h= 16;
3273
3274                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3275                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3276
3277                 s->current_picture.encoding_error[0] += sse(
3278                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3279                     s->dest[0], w, h, s->linesize);
3280                 s->current_picture.encoding_error[1] += sse(
3281                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3282                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3283                 s->current_picture.encoding_error[2] += sse(
3284                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3285                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3286             }
3287             if(s->loop_filter){
3288                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3289                     ff_h263_loop_filter(s);
3290             }
3291             ff_dlog(s->avctx, "MB %d %d bits\n",
3292                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3293         }
3294     }
3295
3296     //not beautiful here but we must write it before flushing so it has to be here
3297     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3298         ff_msmpeg4_encode_ext_header(s);
3299
3300     write_slice_end(s);
3301
3302 #if FF_API_RTP_CALLBACK
3303 FF_DISABLE_DEPRECATION_WARNINGS
3304     /* Send the last GOB if RTP */
3305     if (s->avctx->rtp_callback) {
3306         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3307         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3308         /* Call the RTP callback to send the last GOB */
3309         emms_c();
3310         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3311     }
3312 FF_ENABLE_DEPRECATION_WARNINGS
3313 #endif
3314
3315     return 0;
3316 }
3317
3318 #define MERGE(field) dst->field += src->field; src->field=0
3319 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3320     MERGE(me.scene_change_score);
3321     MERGE(me.mc_mb_var_sum_temp);
3322     MERGE(me.mb_var_sum_temp);
3323 }
3324
3325 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3326     int i;
3327
3328     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3329     MERGE(dct_count[1]);
3330     MERGE(mv_bits);
3331     MERGE(i_tex_bits);
3332     MERGE(p_tex_bits);
3333     MERGE(i_count);
3334     MERGE(f_count);
3335     MERGE(b_count);
3336     MERGE(skip_count);
3337     MERGE(misc_bits);
3338     MERGE(er.error_count);
3339     MERGE(padding_bug_score);
3340     MERGE(current_picture.encoding_error[0]);
3341     MERGE(current_picture.encoding_error[1]);
3342     MERGE(current_picture.encoding_error[2]);
3343
3344     if (dst->noise_reduction){
3345         for(i=0; i<64; i++){
3346             MERGE(dct_error_sum[0][i]);
3347             MERGE(dct_error_sum[1][i]);
3348         }
3349     }
3350
3351     assert(put_bits_count(&src->pb) % 8 ==0);
3352     assert(put_bits_count(&dst->pb) % 8 ==0);
3353     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3354     flush_put_bits(&dst->pb);
3355 }
3356
3357 static int estimate_qp(MpegEncContext *s, int dry_run){
3358     if (s->next_lambda){
3359         s->current_picture_ptr->f->quality =
3360         s->current_picture.f->quality = s->next_lambda;
3361         if(!dry_run) s->next_lambda= 0;
3362     } else if (!s->fixed_qscale) {
3363         s->current_picture_ptr->f->quality =
3364         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3365         if (s->current_picture.f->quality < 0)
3366             return -1;
3367     }
3368
3369     if(s->adaptive_quant){
3370         switch(s->codec_id){
3371         case AV_CODEC_ID_MPEG4:
3372             if (CONFIG_MPEG4_ENCODER)
3373                 ff_clean_mpeg4_qscales(s);
3374             break;
3375         case AV_CODEC_ID_H263:
3376         case AV_CODEC_ID_H263P:
3377         case AV_CODEC_ID_FLV1:
3378             if (CONFIG_H263_ENCODER)
3379                 ff_clean_h263_qscales(s);
3380             break;
3381         default:
3382             ff_init_qscale_tab(s);
3383         }
3384
3385         s->lambda= s->lambda_table[0];
3386         //FIXME broken
3387     }else
3388         s->lambda = s->current_picture.f->quality;
3389     update_qscale(s);
3390     return 0;
3391 }
3392
3393 /* must be called before writing the header */
3394 static void set_frame_distances(MpegEncContext * s){
3395     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3396     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3397
3398     if(s->pict_type==AV_PICTURE_TYPE_B){
3399         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3400         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3401     }else{
3402         s->pp_time= s->time - s->last_non_b_time;
3403         s->last_non_b_time= s->time;
3404         assert(s->picture_number==0 || s->pp_time > 0);
3405     }
3406 }
3407
3408 static int encode_picture(MpegEncContext *s, int picture_number)
3409 {
3410     int i, ret;
3411     int bits;
3412     int context_count = s->slice_context_count;
3413
3414     s->picture_number = picture_number;
3415
3416     /* Reset the average MB variance */
3417     s->me.mb_var_sum_temp    =
3418     s->me.mc_mb_var_sum_temp = 0;
3419
3420     /* we need to initialize some time vars before we can encode b-frames */
3421     // RAL: Condition added for MPEG1VIDEO
3422     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3423         set_frame_distances(s);
3424     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3425         ff_set_mpeg4_time(s);
3426
3427     s->me.scene_change_score=0;
3428
3429 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3430
3431     if(s->pict_type==AV_PICTURE_TYPE_I){
3432         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3433         else                        s->no_rounding=0;
3434     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3435         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3436             s->no_rounding ^= 1;
3437     }
3438
3439     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3440         if (estimate_qp(s,1) < 0)
3441             return -1;
3442         ff_get_2pass_fcode(s);
3443     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3444         if(s->pict_type==AV_PICTURE_TYPE_B)
3445             s->lambda= s->last_lambda_for[s->pict_type];
3446         else
3447             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3448         update_qscale(s);
3449     }
3450
3451     s->mb_intra=0; //for the rate distortion & bit compare functions
3452     for(i=1; i<context_count; i++){
3453         ret = ff_update_duplicate_context(s->thread_context[i], s);
3454         if (ret < 0)
3455             return ret;
3456     }
3457
3458     if(ff_init_me(s)<0)
3459         return -1;
3460
3461     /* Estimate motion for every MB */
3462     if(s->pict_type != AV_PICTURE_TYPE_I){
3463         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3464         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3465         if (s->pict_type != AV_PICTURE_TYPE_B) {
3466             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3467                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3468             }
3469         }
3470
3471         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3472     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3473         /* I-Frame */
3474         for(i=0; i<s->mb_stride*s->mb_height; i++)
3475             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3476
3477         if(!s->fixed_qscale){
3478             /* finding spatial complexity for I-frame rate control */
3479             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3480         }
3481     }
3482     for(i=1; i<context_count; i++){
3483         merge_context_after_me(s, s->thread_context[i]);
3484     }
3485     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3486     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3487     emms_c();
3488
3489     if (s->me.scene_change_score > s->scenechange_threshold &&
3490         s->pict_type == AV_PICTURE_TYPE_P) {
3491         s->pict_type= AV_PICTURE_TYPE_I;
3492         for(i=0; i<s->mb_stride*s->mb_height; i++)
3493             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3494         ff_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3495                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3496     }
3497
3498     if(!s->umvplus){
3499         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3500             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3501
3502             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3503                 int a,b;
3504                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3505                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3506                 s->f_code= FFMAX3(s->f_code, a, b);
3507             }
3508
3509             ff_fix_long_p_mvs(s);
3510             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3511             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3512                 int j;
3513                 for(i=0; i<2; i++){
3514                     for(j=0; j<2; j++)
3515                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3516                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3517                 }
3518             }
3519         }
3520
3521         if(s->pict_type==AV_PICTURE_TYPE_B){
3522             int a, b;
3523
3524             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3525             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3526             s->f_code = FFMAX(a, b);
3527
3528             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3529             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3530             s->b_code = FFMAX(a, b);
3531
3532             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3533             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3534             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3535             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3536             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3537                 int dir, j;
3538                 for(dir=0; dir<2; dir++){
3539                     for(i=0; i<2; i++){
3540                         for(j=0; j<2; j++){
3541                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3542                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3543                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3544                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3545                         }
3546                     }
3547                 }
3548             }
3549         }
3550     }
3551
3552     if (estimate_qp(s, 0) < 0)
3553         return -1;
3554
3555     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3556         s->pict_type == AV_PICTURE_TYPE_I &&
3557         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3558         s->qscale= 3; //reduce clipping problems
3559
3560     if (s->out_format == FMT_MJPEG) {
3561         /* for mjpeg, we do include qscale in the matrix */
3562         for(i=1;i<64;i++){
3563             int j = s->idsp.idct_permutation[i];
3564
3565             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3566         }
3567         s->y_dc_scale_table=
3568         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3569         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3570         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3571                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3572         s->qscale= 8;
3573     }
3574
3575     //FIXME var duplication
3576     s->current_picture_ptr->f->key_frame =
3577     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3578     s->current_picture_ptr->f->pict_type =
3579     s->current_picture.f->pict_type = s->pict_type;
3580
3581     if (s->current_picture.f->key_frame)
3582         s->picture_in_gop_number=0;
3583
3584     s->last_bits= put_bits_count(&s->pb);
3585     switch(s->out_format) {
3586     case FMT_MJPEG:
3587         if (CONFIG_MJPEG_ENCODER)
3588             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3589                                            s->pred, s->intra_matrix);
3590         break;
3591     case FMT_H261:
3592         if (CONFIG_H261_ENCODER)
3593             ff_h261_encode_picture_header(s, picture_number);
3594         break;
3595     case FMT_H263:
3596         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3597             ff_wmv2_encode_picture_header(s, picture_number);
3598         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3599             ff_msmpeg4_encode_picture_header(s, picture_number);
3600         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3601             ff_mpeg4_encode_picture_header(s, picture_number);
3602         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3603             ret = ff_rv10_encode_picture_header(s, picture_number);
3604             if (ret < 0)
3605                 return ret;
3606         }
3607         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3608             ff_rv20_encode_picture_header(s, picture_number);
3609         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3610             ff_flv_encode_picture_header(s, picture_number);
3611         else if (CONFIG_H263_ENCODER)
3612             ff_h263_encode_picture_header(s, picture_number);
3613         break;
3614     case FMT_MPEG1:
3615         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3616             ff_mpeg1_encode_picture_header(s, picture_number);
3617         break;
3618     default:
3619         assert(0);
3620     }
3621     bits= put_bits_count(&s->pb);
3622     s->header_bits= bits - s->last_bits;
3623
3624     for(i=1; i<context_count; i++){
3625         update_duplicate_context_after_me(s->thread_context[i], s);
3626     }
3627     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3628     for(i=1; i<context_count; i++){
3629         merge_context_after_encode(s, s->thread_context[i]);
3630     }
3631     emms_c();
3632     return 0;
3633 }
3634
3635 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3636     const int intra= s->mb_intra;
3637     int i;
3638
3639     s->dct_count[intra]++;
3640
3641     for(i=0; i<64; i++){
3642         int level= block[i];
3643
3644         if(level){
3645             if(level>0){
3646                 s->dct_error_sum[intra][i] += level;
3647                 level -= s->dct_offset[intra][i];
3648                 if(level<0) level=0;
3649             }else{
3650                 s->dct_error_sum[intra][i] -= level;
3651                 level += s->dct_offset[intra][i];
3652                 if(level>0) level=0;
3653             }
3654             block[i]= level;
3655         }
3656     }
3657 }
3658
3659 static int dct_quantize_trellis_c(MpegEncContext *s,
3660                                   int16_t *block, int n,
3661                                   int qscale, int *overflow){
3662     const int *qmat;
3663     const uint8_t *scantable= s->intra_scantable.scantable;
3664     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3665     int max=0;
3666     unsigned int threshold1, threshold2;
3667     int bias=0;
3668     int run_tab[65];
3669     int level_tab[65];
3670     int score_tab[65];
3671     int survivor[65];
3672     int survivor_count;
3673     int last_run=0;
3674     int last_level=0;
3675     int last_score= 0;
3676     int last_i;
3677     int coeff[2][64];
3678     int coeff_count[64];
3679     int qmul, qadd, start_i, last_non_zero, i, dc;
3680     const int esc_length= s->ac_esc_length;
3681     uint8_t * length;
3682     uint8_t * last_length;
3683     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3684
3685     s->fdsp.fdct(block);
3686
3687     if(s->dct_error_sum)
3688         s->denoise_dct(s, block);
3689     qmul= qscale*16;
3690     qadd= ((qscale-1)|1)*8;
3691
3692     if (s->mb_intra) {
3693         int q;
3694         if (!s->h263_aic) {
3695             if (n < 4)
3696                 q = s->y_dc_scale;
3697             else
3698                 q = s->c_dc_scale;
3699             q = q << 3;
3700         } else{
3701             /* For AIC we skip quant/dequant of INTRADC */
3702             q = 1 << 3;
3703             qadd=0;
3704         }
3705
3706         /* note: block[0] is assumed to be positive */
3707         block[0] = (block[0] + (q >> 1)) / q;
3708         start_i = 1;
3709         last_non_zero = 0;
3710         qmat = s->q_intra_matrix[qscale];
3711         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3712             bias= 1<<(QMAT_SHIFT-1);
3713         length     = s->intra_ac_vlc_length;
3714         last_length= s->intra_ac_vlc_last_length;
3715     } else {
3716         start_i = 0;
3717         last_non_zero = -1;
3718         qmat = s->q_inter_matrix[qscale];
3719         length     = s->inter_ac_vlc_length;
3720         last_length= s->inter_ac_vlc_last_length;
3721     }
3722     last_i= start_i;
3723
3724     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3725     threshold2= (threshold1<<1);
3726
3727     for(i=63; i>=start_i; i--) {
3728         const int j = scantable[i];
3729         int level = block[j] * qmat[j];
3730
3731         if(((unsigned)(level+threshold1))>threshold2){
3732             last_non_zero = i;
3733             break;
3734         }
3735     }
3736
3737     for(i=start_i; i<=last_non_zero; i++) {
3738         const int j = scantable[i];
3739         int level = block[j] * qmat[j];
3740
3741 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3742 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3743         if(((unsigned)(level+threshold1))>threshold2){
3744             if(level>0){
3745                 level= (bias + level)>>QMAT_SHIFT;
3746                 coeff[0][i]= level;
3747                 coeff[1][i]= level-1;
3748 //                coeff[2][k]= level-2;
3749             }else{
3750                 level= (bias - level)>>QMAT_SHIFT;
3751                 coeff[0][i]= -level;
3752                 coeff[1][i]= -level+1;
3753 //                coeff[2][k]= -level+2;
3754             }
3755             coeff_count[i]= FFMIN(level, 2);
3756             assert(coeff_count[i]);
3757             max |=level;
3758         }else{
3759             coeff[0][i]= (level>>31)|1;
3760             coeff_count[i]= 1;
3761         }
3762     }
3763
3764     *overflow= s->max_qcoeff < max; //overflow might have happened
3765
3766     if(last_non_zero < start_i){
3767         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3768         return last_non_zero;
3769     }
3770
3771     score_tab[start_i]= 0;
3772     survivor[0]= start_i;
3773     survivor_count= 1;
3774
3775     for(i=start_i; i<=last_non_zero; i++){
3776         int level_index, j, zero_distortion;
3777         int dct_coeff= FFABS(block[ scantable[i] ]);
3778         int best_score=256*256*256*120;
3779
3780         if (s->fdsp.fdct == ff_fdct_ifast)
3781             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3782         zero_distortion= dct_coeff*dct_coeff;
3783
3784         for(level_index=0; level_index < coeff_count[i]; level_index++){
3785             int distortion;
3786             int level= coeff[level_index][i];
3787             const int alevel= FFABS(level);
3788             int unquant_coeff;
3789
3790             assert(level);
3791
3792             if(s->out_format == FMT_H263){
3793                 unquant_coeff= alevel*qmul + qadd;
3794             }else{ //MPEG1
3795                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3796                 if(s->mb_intra){
3797                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3798                         unquant_coeff =   (unquant_coeff - 1) | 1;
3799                 }else{
3800                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3801                         unquant_coeff =   (unquant_coeff - 1) | 1;
3802                 }
3803                 unquant_coeff<<= 3;
3804             }
3805
3806             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3807             level+=64;
3808             if((level&(~127)) == 0){
3809                 for(j=survivor_count-1; j>=0; j--){
3810                     int run= i - survivor[j];
3811                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3812                     score += score_tab[i-run];
3813
3814                     if(score < best_score){
3815                         best_score= score;
3816                         run_tab[i+1]= run;
3817                         level_tab[i+1]= level-64;
3818                     }
3819                 }
3820
3821                 if(s->out_format == FMT_H263){
3822                     for(j=survivor_count-1; j>=0; j--){
3823                         int run= i - survivor[j];
3824                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3825                         score += score_tab[i-run];
3826                         if(score < last_score){
3827                             last_score= score;
3828                             last_run= run;
3829                             last_level= level-64;
3830                             last_i= i+1;
3831                         }
3832                     }
3833                 }
3834             }else{
3835                 distortion += esc_length*lambda;
3836                 for(j=survivor_count-1; j>=0; j--){
3837                     int run= i - survivor[j];
3838                     int score= distortion + score_tab[i-run];
3839
3840                     if(score < best_score){
3841                         best_score= score;
3842                         run_tab[i+1]= run;
3843                         level_tab[i+1]= level-64;
3844                     }
3845                 }
3846
3847                 if(s->out_format == FMT_H263){
3848                   for(j=survivor_count-1; j>=0; j--){
3849                         int run= i - survivor[j];
3850                         int score= distortion + score_tab[i-run];
3851                         if(score < last_score){
3852                             last_score= score;
3853                             last_run= run;
3854                             last_level= level-64;
3855                             last_i= i+1;
3856                         }
3857                     }
3858                 }
3859             }
3860         }
3861
3862         score_tab[i+1]= best_score;
3863
3864         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3865         if(last_non_zero <= 27){
3866             for(; survivor_count; survivor_count--){
3867                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3868                     break;
3869             }
3870         }else{
3871             for(; survivor_count; survivor_count--){
3872                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3873                     break;
3874             }
3875         }
3876
3877         survivor[ survivor_count++ ]= i+1;
3878     }
3879
3880     if(s->out_format != FMT_H263){
3881         last_score= 256*256*256*120;
3882         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3883             int score= score_tab[i];
3884             if(i) score += lambda*2; //FIXME exacter?
3885
3886             if(score < last_score){
3887                 last_score= score;
3888                 last_i= i;
3889                 last_level= level_tab[i];
3890                 last_run= run_tab[i];
3891             }
3892         }
3893     }
3894
3895     s->coded_score[n] = last_score;
3896
3897     dc= FFABS(block[0]);
3898     last_non_zero= last_i - 1;
3899     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3900
3901     if(last_non_zero < start_i)
3902         return last_non_zero;
3903
3904     if(last_non_zero == 0 && start_i == 0){
3905         int best_level= 0;
3906         int best_score= dc * dc;
3907
3908         for(i=0; i<coeff_count[0]; i++){
3909             int level= coeff[i][0];
3910             int alevel= FFABS(level);
3911             int unquant_coeff, score, distortion;
3912
3913             if(s->out_format == FMT_H263){
3914                     unquant_coeff= (alevel*qmul + qadd)>>3;
3915             }else{ //MPEG1
3916                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3917                     unquant_coeff =   (unquant_coeff - 1) | 1;
3918             }
3919             unquant_coeff = (unquant_coeff + 4) >> 3;
3920             unquant_coeff<<= 3 + 3;
3921
3922             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3923             level+=64;
3924             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3925             else                    score= distortion + esc_length*lambda;
3926
3927             if(score < best_score){
3928                 best_score= score;
3929                 best_level= level - 64;
3930             }
3931         }
3932         block[0]= best_level;
3933         s->coded_score[n] = best_score - dc*dc;
3934         if(best_level == 0) return -1;
3935         else                return last_non_zero;
3936     }
3937
3938     i= last_i;
3939     assert(last_level);
3940
3941     block[ perm_scantable[last_non_zero] ]= last_level;
3942     i -= last_run + 1;
3943
3944     for(; i>start_i; i -= run_tab[i] + 1){
3945         block[ perm_scantable[i-1] ]= level_tab[i];
3946     }
3947
3948     return last_non_zero;
3949 }
3950
3951 //#define REFINE_STATS 1
3952 static int16_t basis[64][64];
3953
3954 static void build_basis(uint8_t *perm){
3955     int i, j, x, y;
3956     emms_c();
3957     for(i=0; i<8; i++){
3958         for(j=0; j<8; j++){
3959             for(y=0; y<8; y++){
3960                 for(x=0; x<8; x++){
3961                     double s= 0.25*(1<<BASIS_SHIFT);
3962                     int index= 8*i + j;
3963                     int perm_index= perm[index];
3964                     if(i==0) s*= sqrt(0.5);
3965                     if(j==0) s*= sqrt(0.5);
3966                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3967                 }
3968             }
3969         }
3970     }
3971 }
3972
3973 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3974                         int16_t *block, int16_t *weight, int16_t *orig,
3975                         int n, int qscale){
3976     int16_t rem[64];
3977     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3978     const uint8_t *scantable= s->intra_scantable.scantable;
3979     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3980 //    unsigned int threshold1, threshold2;
3981 //    int bias=0;
3982     int run_tab[65];
3983     int prev_run=0;
3984     int prev_level=0;
3985     int qmul, qadd, start_i, last_non_zero, i, dc;
3986     uint8_t * length;
3987     uint8_t * last_length;
3988     int lambda;
3989     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3990 #ifdef REFINE_STATS
3991 static int count=0;
3992 static int after_last=0;
3993 static int to_zero=0;
3994 static int from_zero=0;
3995 static int raise=0;
3996 static int lower=0;
3997 static int messed_sign=0;
3998 #endif
3999
4000     if(basis[0][0] == 0)
4001         build_basis(s->idsp.idct_permutation);
4002
4003     qmul= qscale*2;
4004     qadd= (qscale-1)|1;
4005     if (s->mb_intra) {
4006         if (!s->h263_aic) {
4007             if (n < 4)
4008                 q = s->y_dc_scale;
4009             else
4010                 q = s->c_dc_scale;
4011         } else{
4012             /* For AIC we skip quant/dequant of INTRADC */
4013             q = 1;
4014             qadd=0;
4015         }
4016         q <<= RECON_SHIFT-3;
4017         /* note: block[0] is assumed to be positive */
4018         dc= block[0]*q;
4019 //        block[0] = (block[0] + (q >> 1)) / q;
4020         start_i = 1;
4021 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4022 //            bias= 1<<(QMAT_SHIFT-1);
4023         length     = s->intra_ac_vlc_length;
4024         last_length= s->intra_ac_vlc_last_length;
4025     } else {
4026         dc= 0;
4027         start_i = 0;
4028         length     = s->inter_ac_vlc_length;
4029         last_length= s->inter_ac_vlc_last_length;
4030     }
4031     last_non_zero = s->block_last_index[n];
4032
4033 #ifdef REFINE_STATS
4034 {START_TIMER
4035 #endif
4036     dc += (1<<(RECON_SHIFT-1));
4037     for(i=0; i<64; i++){
4038         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4039     }
4040 #ifdef REFINE_STATS
4041 STOP_TIMER("memset rem[]")}
4042 #endif
4043     sum=0;
4044     for(i=0; i<64; i++){
4045         int one= 36;
4046         int qns=4;
4047         int w;
4048
4049         w= FFABS(weight[i]) + qns*one;
4050         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4051
4052         weight[i] = w;
4053 //        w=weight[i] = (63*qns + (w/2)) / w;
4054
4055         assert(w>0);
4056         assert(w<(1<<6));
4057         sum += w*w;
4058     }
4059     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4060 #ifdef REFINE_STATS
4061 {START_TIMER
4062 #endif
4063     run=0;
4064     rle_index=0;
4065     for(i=start_i; i<=last_non_zero; i++){
4066         int j= perm_scantable[i];
4067         const int level= block[j];
4068         int coeff;
4069
4070         if(level){
4071             if(level<0) coeff= qmul*level - qadd;
4072             else        coeff= qmul*level + qadd;
4073             run_tab[rle_index++]=run;
4074             run=0;
4075
4076             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4077         }else{
4078             run++;
4079         }
4080     }
4081 #ifdef REFINE_STATS
4082 if(last_non_zero>0){
4083 STOP_TIMER("init rem[]")
4084 }
4085 }
4086
4087 {START_TIMER
4088 #endif
4089     for(;;){
4090         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4091         int best_coeff=0;
4092         int best_change=0;
4093         int run2, best_unquant_change=0, analyze_gradient;
4094 #ifdef REFINE_STATS
4095 {START_TIMER
4096 #endif
4097         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4098
4099         if(analyze_gradient){
4100 #ifdef REFINE_STATS
4101 {START_TIMER
4102 #endif
4103             for(i=0; i<64; i++){
4104                 int w= weight[i];
4105
4106                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4107             }
4108 #ifdef REFINE_STATS
4109 STOP_TIMER("rem*w*w")}
4110 {START_TIMER
4111 #endif
4112             s->fdsp.fdct(d1);
4113 #ifdef REFINE_STATS
4114 STOP_TIMER("dct")}
4115 #endif
4116         }
4117
4118         if(start_i){
4119             const int level= block[0];
4120             int change, old_coeff;
4121
4122             assert(s->mb_intra);
4123
4124             old_coeff= q*level;
4125
4126             for(change=-1; change<=1; change+=2){
4127                 int new_level= level + change;
4128                 int score, new_coeff;
4129
4130                 new_coeff= q*new_level;
4131                 if(new_coeff >= 2048 || new_coeff < 0)
4132                     continue;
4133
4134                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4135                                                   new_coeff - old_coeff);
4136                 if(score<best_score){
4137                     best_score= score;
4138                     best_coeff= 0;
4139                     best_change= change;
4140                     best_unquant_change= new_coeff - old_coeff;
4141                 }
4142             }
4143         }
4144
4145         run=0;
4146         rle_index=0;
4147         run2= run_tab[rle_index++];
4148         prev_level=0;
4149         prev_run=0;
4150
4151         for(i=start_i; i<64; i++){
4152             int j= perm_scantable[i];
4153             const int level= block[j];
4154             int change, old_coeff;
4155
4156             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4157                 break;
4158
4159             if(level){
4160                 if(level<0) old_coeff= qmul*level - qadd;
4161                 else        old_coeff= qmul*level + qadd;
4162                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4163             }else{
4164                 old_coeff=0;
4165                 run2--;
4166                 assert(run2>=0 || i >= last_non_zero );
4167             }
4168
4169             for(change=-1; change<=1; change+=2){
4170                 int new_level= level + change;
4171                 int score, new_coeff, unquant_change;
4172
4173                 score=0;
4174                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4175                    continue;
4176
4177                 if(new_level){
4178                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4179                     else            new_coeff= qmul*new_level + qadd;
4180                     if(new_coeff >= 2048 || new_coeff <= -2048)
4181                         continue;
4182                     //FIXME check for overflow
4183
4184                     if(level){
4185                         if(level < 63 && level > -63){
4186                             if(i < last_non_zero)
4187                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4188                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4189                             else
4190                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4191                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4192                         }
4193                     }else{
4194                         assert(FFABS(new_level)==1);
4195
4196                         if(analyze_gradient){
4197                             int g= d1[ scantable[i] ];
4198                             if(g && (g^new_level) >= 0)
4199                                 continue;
4200                         }
4201
4202                         if(i < last_non_zero){
4203                             int next_i= i + run2 + 1;
4204                             int next_level= block[ perm_scantable[next_i] ] + 64;
4205
4206                             if(next_level&(~127))
4207                                 next_level= 0;
4208
4209                             if(next_i < last_non_zero)
4210                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4211                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4212                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4213                             else
4214                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4215                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4216                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4217                         }else{
4218                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4219                             if(prev_level){
4220                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4221                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4222                             }
4223                         }
4224                     }
4225                 }else{
4226                     new_coeff=0;
4227                     assert(FFABS(level)==1);
4228
4229                     if(i < last_non_zero){
4230                         int next_i= i + run2 + 1;
4231                         int next_level= block[ perm_scantable[next_i] ] + 64;
4232
4233                         if(next_level&(~127))
4234                             next_level= 0;
4235
4236                         if(next_i < last_non_zero)
4237                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4238                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4239                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4240                         else
4241                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4242                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4243                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4244                     }else{
4245                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4246                         if(prev_level){
4247                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4248                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4249                         }
4250                     }
4251                 }
4252
4253                 score *= lambda;
4254
4255                 unquant_change= new_coeff - old_coeff;
4256                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4257
4258                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4259                                                    unquant_change);
4260                 if(score<best_score){
4261                     best_score= score;
4262                     best_coeff= i;
4263                     best_change= change;
4264                     best_unquant_change= unquant_change;
4265                 }
4266             }
4267             if(level){
4268                 prev_level= level + 64;
4269                 if(prev_level&(~127))
4270                     prev_level= 0;
4271                 prev_run= run;
4272                 run=0;
4273             }else{
4274                 run++;
4275             }
4276         }
4277 #ifdef REFINE_STATS
4278 STOP_TIMER("iterative step")}
4279 #endif
4280
4281         if(best_change){
4282             int j= perm_scantable[ best_coeff ];
4283
4284             block[j] += best_change;
4285
4286             if(best_coeff > last_non_zero){
4287                 last_non_zero= best_coeff;
4288                 assert(block[j]);
4289 #ifdef REFINE_STATS
4290 after_last++;
4291 #endif
4292             }else{
4293 #ifdef REFINE_STATS
4294 if(block[j]){
4295     if(block[j] - best_change){
4296         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4297             raise++;
4298         }else{
4299             lower++;
4300         }
4301     }else{
4302         from_zero++;
4303     }
4304 }else{
4305     to_zero++;
4306 }
4307 #endif
4308                 for(; last_non_zero>=start_i; last_non_zero--){
4309                     if(block[perm_scantable[last_non_zero]])
4310                         break;
4311                 }
4312             }
4313 #ifdef REFINE_STATS
4314 count++;
4315 if(256*256*256*64 % count == 0){
4316     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4317 }
4318 #endif
4319             run=0;
4320             rle_index=0;
4321             for(i=start_i; i<=last_non_zero; i++){
4322                 int j= perm_scantable[i];
4323                 const int level= block[j];
4324
4325                  if(level){
4326                      run_tab[rle_index++]=run;
4327                      run=0;
4328                  }else{
4329                      run++;
4330                  }
4331             }
4332
4333             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4334         }else{
4335             break;
4336         }
4337     }
4338 #ifdef REFINE_STATS
4339 if(last_non_zero>0){
4340 STOP_TIMER("iterative search")
4341 }
4342 }
4343 #endif
4344
4345     return last_non_zero;
4346 }
4347
4348 /**
4349  * Permute an 8x8 block according to permuatation.
4350  * @param block the block which will be permuted according to
4351  *              the given permutation vector
4352  * @param permutation the permutation vector
4353  * @param last the last non zero coefficient in scantable order, used to
4354  *             speed the permutation up
4355  * @param scantable the used scantable, this is only used to speed the
4356  *                  permutation up, the block is not (inverse) permutated
4357  *                  to scantable order!
4358  */
4359 static void block_permute(int16_t *block, uint8_t *permutation,
4360                           const uint8_t *scantable, int last)
4361 {
4362     int i;
4363     int16_t temp[64];
4364
4365     if (last <= 0)
4366         return;
4367     //FIXME it is ok but not clean and might fail for some permutations
4368     // if (permutation[1] == 1)
4369     // return;
4370
4371     for (i = 0; i <= last; i++) {
4372         const int j = scantable[i];
4373         temp[j] = block[j];
4374         block[j] = 0;
4375     }
4376
4377     for (i = 0; i <= last; i++) {
4378         const int j = scantable[i];
4379         const int perm_j = permutation[j];
4380         block[perm_j] = temp[j];
4381     }
4382 }
4383
4384 int ff_dct_quantize_c(MpegEncContext *s,
4385                         int16_t *block, int n,
4386                         int qscale, int *overflow)
4387 {
4388     int i, j, level, last_non_zero, q, start_i;
4389     const int *qmat;
4390     const uint8_t *scantable= s->intra_scantable.scantable;
4391     int bias;
4392     int max=0;
4393     unsigned int threshold1, threshold2;
4394
4395     s->fdsp.fdct(block);
4396
4397     if(s->dct_error_sum)
4398         s->denoise_dct(s, block);
4399
4400     if (s->mb_intra) {
4401         if (!s->h263_aic) {
4402             if (n < 4)
4403                 q = s->y_dc_scale;
4404             else
4405                 q = s->c_dc_scale;
4406             q = q << 3;
4407         } else
4408             /* For AIC we skip quant/dequant of INTRADC */
4409             q = 1 << 3;
4410
4411         /* note: block[0] is assumed to be positive */
4412         block[0] = (block[0] + (q >> 1)) / q;
4413         start_i = 1;
4414         last_non_zero = 0;
4415         qmat = s->q_intra_matrix[qscale];
4416         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4417     } else {
4418         start_i = 0;
4419         last_non_zero = -1;
4420         qmat = s->q_inter_matrix[qscale];
4421         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4422     }
4423     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4424     threshold2= (threshold1<<1);
4425     for(i=63;i>=start_i;i--) {
4426         j = scantable[i];
4427         level = block[j] * qmat[j];
4428
4429         if(((unsigned)(level+threshold1))>threshold2){
4430             last_non_zero = i;
4431             break;
4432         }else{
4433             block[j]=0;
4434         }
4435     }
4436     for(i=start_i; i<=last_non_zero; i++) {
4437         j = scantable[i];
4438         level = block[j] * qmat[j];
4439
4440 //        if(   bias+level >= (1<<QMAT_SHIFT)
4441 //           || bias-level >= (1<<QMAT_SHIFT)){
4442         if(((unsigned)(level+threshold1))>threshold2){
4443             if(level>0){
4444                 level= (bias + level)>>QMAT_SHIFT;
4445                 block[j]= level;
4446             }else{
4447                 level= (bias - level)>>QMAT_SHIFT;
4448                 block[j]= -level;
4449             }
4450             max |=level;
4451         }else{
4452             block[j]=0;
4453         }
4454     }
4455     *overflow= s->max_qcoeff < max; //overflow might have happened
4456
4457     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4458     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4459         block_permute(block, s->idsp.idct_permutation,
4460                       scantable, last_non_zero);
4461
4462     return last_non_zero;
4463 }
4464
4465 #define OFFSET(x) offsetof(MpegEncContext, x)
4466 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4467 static const AVOption h263_options[] = {
4468     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4469     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4470     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4471     FF_MPV_COMMON_OPTS
4472     { NULL },
4473 };
4474
4475 static const AVClass h263_class = {
4476     .class_name = "H.263 encoder",
4477     .item_name  = av_default_item_name,
4478     .option     = h263_options,
4479     .version    = LIBAVUTIL_VERSION_INT,
4480 };
4481
4482 AVCodec ff_h263_encoder = {
4483     .name           = "h263",
4484     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4485     .type           = AVMEDIA_TYPE_VIDEO,
4486     .id             = AV_CODEC_ID_H263,
4487     .priv_data_size = sizeof(MpegEncContext),
4488     .init           = ff_mpv_encode_init,
4489     .encode2        = ff_mpv_encode_picture,
4490     .close          = ff_mpv_encode_end,
4491     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4492     .priv_class     = &h263_class,
4493 };
4494
4495 static const AVOption h263p_options[] = {
4496     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4497     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4498     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4499     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4500     FF_MPV_COMMON_OPTS
4501     { NULL },
4502 };
4503 static const AVClass h263p_class = {
4504     .class_name = "H.263p encoder",
4505     .item_name  = av_default_item_name,
4506     .option     = h263p_options,
4507     .version    = LIBAVUTIL_VERSION_INT,
4508 };
4509
4510 AVCodec ff_h263p_encoder = {
4511     .name           = "h263p",
4512     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4513     .type           = AVMEDIA_TYPE_VIDEO,
4514     .id             = AV_CODEC_ID_H263P,
4515     .priv_data_size = sizeof(MpegEncContext),
4516     .init           = ff_mpv_encode_init,
4517     .encode2        = ff_mpv_encode_picture,
4518     .close          = ff_mpv_encode_end,
4519     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4520     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4521     .priv_class     = &h263p_class,
4522 };
4523
4524 static const AVClass msmpeg4v2_class = {
4525     .class_name = "msmpeg4v2 encoder",
4526     .item_name  = av_default_item_name,
4527     .option     = ff_mpv_generic_options,
4528     .version    = LIBAVUTIL_VERSION_INT,
4529 };
4530
4531 AVCodec ff_msmpeg4v2_encoder = {
4532     .name           = "msmpeg4v2",
4533     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4534     .type           = AVMEDIA_TYPE_VIDEO,
4535     .id             = AV_CODEC_ID_MSMPEG4V2,
4536     .priv_data_size = sizeof(MpegEncContext),
4537     .init           = ff_mpv_encode_init,
4538     .encode2        = ff_mpv_encode_picture,
4539     .close          = ff_mpv_encode_end,
4540     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4541     .priv_class     = &msmpeg4v2_class,
4542 };
4543
4544 static const AVClass msmpeg4v3_class = {
4545     .class_name = "msmpeg4v3 encoder",
4546     .item_name  = av_default_item_name,
4547     .option     = ff_mpv_generic_options,
4548     .version    = LIBAVUTIL_VERSION_INT,
4549 };
4550
4551 AVCodec ff_msmpeg4v3_encoder = {
4552     .name           = "msmpeg4",
4553     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4554     .type           = AVMEDIA_TYPE_VIDEO,
4555     .id             = AV_CODEC_ID_MSMPEG4V3,
4556     .priv_data_size = sizeof(MpegEncContext),
4557     .init           = ff_mpv_encode_init,
4558     .encode2        = ff_mpv_encode_picture,
4559     .close          = ff_mpv_encode_end,
4560     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4561     .priv_class     = &msmpeg4v3_class,
4562 };
4563
4564 static const AVClass wmv1_class = {
4565     .class_name = "wmv1 encoder",
4566     .item_name  = av_default_item_name,
4567     .option     = ff_mpv_generic_options,
4568     .version    = LIBAVUTIL_VERSION_INT,
4569 };
4570
4571 AVCodec ff_wmv1_encoder = {
4572     .name           = "wmv1",
4573     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4574     .type           = AVMEDIA_TYPE_VIDEO,
4575     .id             = AV_CODEC_ID_WMV1,
4576     .priv_data_size = sizeof(MpegEncContext),
4577     .init           = ff_mpv_encode_init,
4578     .encode2        = ff_mpv_encode_picture,
4579     .close          = ff_mpv_encode_end,
4580     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4581     .priv_class     = &wmv1_class,
4582 };