git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "mpegvideodata.h"
  44 #include "h261.h"
  45 #include "h263.h"
  46 #include "h263data.h"
  47 #include "mjpegenc_common.h"
  48 #include "mathops.h"
  49 #include "mpegutils.h"
  50 #include "mjpegenc.h"
  51 #include "msmpeg4.h"
  52 #include "pixblockdsp.h"
  53 #include "qpeldsp.h"
  54 #include "faandct.h"
  55 #include "thread.h"
  56 #include "aandcttab.h"
  57 #include "flv.h"
  58 #include "mpeg4video.h"
  59 #include "internal.h"
  60 #include "bytestream.h"
  61 #include "wmv2.h"
  62 #include "rv10.h"
  63 #include <limits.h>
  64
  65 #define QUANT_BIAS_SHIFT 8
  66
  67 #define QMAT_SHIFT_MMX 16
  68 #define QMAT_SHIFT 22
  69
  70 static int encode_picture(MpegEncContext *s, int picture_number);
  71 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  72 static int sse_mb(MpegEncContext *s);
  73 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  74 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  75
  76 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  77 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  78
  79 const AVOption ff_mpv_generic_options[] = {
  80     FF_MPV_COMMON_OPTS
  81     { NULL },
  82 };
  83
  84 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  85                        uint16_t (*qmat16)[2][64],
  86                        const uint16_t *quant_matrix,
  87                        int bias, int qmin, int qmax, int intra)
  88 {
  89     FDCTDSPContext *fdsp = &s->fdsp;
  90     int qscale;
  91     int shift = 0;
  92
  93     for (qscale = qmin; qscale <= qmax; qscale++) {
  94         int i;
  95         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  96 #if CONFIG_FAANDCT
  97             fdsp->fdct == ff_faandct            ||
  98 #endif /* CONFIG_FAANDCT */
  99             fdsp->fdct == ff_jpeg_fdct_islow_10) {
 100             for (i = 0; i < 64; i++) {
 101                 const int j = s->idsp.idct_permutation[i];
 102                 int64_t den = (int64_t) qscale * quant_matrix[j];
 103                 /* 16 <= qscale * quant_matrix[i] <= 7905
 104                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 105                  *             19952 <=              x  <= 249205026
 106                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 107                  *           3444240 >= (1 << 36) / (x) >= 275 */
 108
 109                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 110             }
 111         } else if (fdsp->fdct == ff_fdct_ifast) {
 112             for (i = 0; i < 64; i++) {
 113                 const int j = s->idsp.idct_permutation[i];
 114                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
 115                 /* 16 <= qscale * quant_matrix[i] <= 7905
 116                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 117                  *             19952 <=              x  <= 249205026
 118                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 119                  *           3444240 >= (1 << 36) / (x) >= 275 */
 120
 121                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
 122             }
 123         } else {
 124             for (i = 0; i < 64; i++) {
 125                 const int j = s->idsp.idct_permutation[i];
 126                 int64_t den = (int64_t) qscale * quant_matrix[j];
 127                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 128                  * Assume x = qscale * quant_matrix[i]
 129                  * So             16 <=              x  <= 7905
 130                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 131                  * so          32768 >= (1 << 19) / (x) >= 67 */
 132                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 133                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 134                 //                    (qscale * quant_matrix[i]);
 135                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
 136
 137                 if (qmat16[qscale][0][i] == 0 ||
 138                     qmat16[qscale][0][i] == 128 * 256)
 139                     qmat16[qscale][0][i] = 128 * 256 - 1;
 140                 qmat16[qscale][1][i] =
 141                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 142                                 qmat16[qscale][0][i]);
 143             }
 144         }
 145
 146         for (i = intra; i < 64; i++) {
 147             int64_t max = 8191;
 148             if (fdsp->fdct == ff_fdct_ifast) {
 149                 max = (8191LL * ff_aanscales[i]) >> 14;
 150             }
 151             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 152                 shift++;
 153             }
 154         }
 155     }
 156     if (shift) {
 157         av_log(NULL, AV_LOG_INFO,
 158                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 159                QMAT_SHIFT - shift);
 160     }
 161 }
 162
 163 static inline void update_qscale(MpegEncContext *s)
 164 {
 165     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 166                 (FF_LAMBDA_SHIFT + 7);
 167     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 168
 169     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 170                  FF_LAMBDA_SHIFT;
 171 }
 172
 173 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 174 {
 175     int i;
 176
 177     if (matrix) {
 178         put_bits(pb, 1, 1);
 179         for (i = 0; i < 64; i++) {
 180             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 181         }
 182     } else
 183         put_bits(pb, 1, 0);
 184 }
 185
 186 /**
 187  * init s->current_picture.qscale_table from s->lambda_table
 188  */
 189 void ff_init_qscale_tab(MpegEncContext *s)
 190 {
 191     int8_t * const qscale_table = s->current_picture.qscale_table;
 192     int i;
 193
 194     for (i = 0; i < s->mb_num; i++) {
 195         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 196         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 197         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 198                                                   s->avctx->qmax);
 199     }
 200 }
 201
 202 static void update_duplicate_context_after_me(MpegEncContext *dst,
 203                                               MpegEncContext *src)
 204 {
 205 #define COPY(a) dst->a= src->a
 206     COPY(pict_type);
 207     COPY(current_picture);
 208     COPY(f_code);
 209     COPY(b_code);
 210     COPY(qscale);
 211     COPY(lambda);
 212     COPY(lambda2);
 213     COPY(picture_in_gop_number);
 214     COPY(gop_picture_number);
 215     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 216     COPY(progressive_frame);    // FIXME don't set in encode_header
 217     COPY(partitioned_frame);    // FIXME don't set in encode_header
 218 #undef COPY
 219 }
 220
 221 /**
 222  * Set the given MpegEncContext to defaults for encoding.
 223  * the changed fields will not depend upon the prior state of the MpegEncContext.
 224  */
 225 static void mpv_encode_defaults(MpegEncContext *s)
 226 {
 227     int i;
 228     ff_mpv_common_defaults(s);
 229
 230     for (i = -16; i < 16; i++) {
 231         default_fcode_tab[i + MAX_MV] = 1;
 232     }
 233     s->me.mv_penalty = default_mv_penalty;
 234     s->fcode_tab     = default_fcode_tab;
 235
 236     s->input_picture_number  = 0;
 237     s->picture_in_gop_number = 0;
 238 }
 239
 240 /* init video encoder */
 241 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 242 {
 243     MpegEncContext *s = avctx->priv_data;
 244     AVCPBProperties *cpb_props;
 245     int i, ret, format_supported;
 246
 247     mpv_encode_defaults(s);
 248
 249     switch (avctx->codec_id) {
 250     case AV_CODEC_ID_MPEG2VIDEO:
 251         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 252             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 253             av_log(avctx, AV_LOG_ERROR,
 254                    "only YUV420 and YUV422 are supported\n");
 255             return -1;
 256         }
 257         break;
 258     case AV_CODEC_ID_MJPEG:
 259         format_supported = 0;
 260         /* JPEG color space */
 261         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 262             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 263             (avctx->color_range == AVCOL_RANGE_JPEG &&
 264              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 265               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
 266             format_supported = 1;
 267         /* MPEG color space */
 268         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 269                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 270                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
 271             format_supported = 1;
 272
 273         if (!format_supported) {
 274             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 275             return -1;
 276         }
 277         break;
 278     default:
 279         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 280             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 281             return -1;
 282         }
 283     }
 284
 285     switch (avctx->pix_fmt) {
 286     case AV_PIX_FMT_YUVJ422P:
 287     case AV_PIX_FMT_YUV422P:
 288         s->chroma_format = CHROMA_422;
 289         break;
 290     case AV_PIX_FMT_YUVJ420P:
 291     case AV_PIX_FMT_YUV420P:
 292     default:
 293         s->chroma_format = CHROMA_420;
 294         break;
 295     }
 296
 297 #if FF_API_PRIVATE_OPT
 298 FF_DISABLE_DEPRECATION_WARNINGS
 299     if (avctx->rtp_payload_size)
 300         s->rtp_payload_size = avctx->rtp_payload_size;
 301     if (avctx->me_penalty_compensation)
 302         s->me_penalty_compensation = avctx->me_penalty_compensation;
 303     if (avctx->pre_me)
 304         s->me_pre = avctx->pre_me;
 305 FF_ENABLE_DEPRECATION_WARNINGS
 306 #endif
 307
 308     s->bit_rate = avctx->bit_rate;
 309     s->width    = avctx->width;
 310     s->height   = avctx->height;
 311     if (avctx->gop_size > 600 &&
 312         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 313         av_log(avctx, AV_LOG_ERROR,
 314                "Warning keyframe interval too large! reducing it ...\n");
 315         avctx->gop_size = 600;
 316     }
 317     s->gop_size     = avctx->gop_size;
 318     s->avctx        = avctx;
 319     if (avctx->max_b_frames > MAX_B_FRAMES) {
 320         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 321                "is %d.\n", MAX_B_FRAMES);
 322     }
 323     s->max_b_frames = avctx->max_b_frames;
 324     s->codec_id     = avctx->codec->id;
 325     s->strict_std_compliance = avctx->strict_std_compliance;
 326     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
 327     s->rtp_mode           = !!s->rtp_payload_size;
 328     s->intra_dc_precision = avctx->intra_dc_precision;
 329     s->user_specified_pts = AV_NOPTS_VALUE;
 330
 331     if (s->gop_size <= 1) {
 332         s->intra_only = 1;
 333         s->gop_size   = 12;
 334     } else {
 335         s->intra_only = 0;
 336     }
 337
 338 #if FF_API_MOTION_EST
 339 FF_DISABLE_DEPRECATION_WARNINGS
 340     s->me_method = avctx->me_method;
 341 FF_ENABLE_DEPRECATION_WARNINGS
 342 #endif
 343
 344     /* Fixed QSCALE */
 345     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
 346
 347 #if FF_API_MPV_OPT
 348     FF_DISABLE_DEPRECATION_WARNINGS
 349     if (avctx->border_masking != 0.0)
 350         s->border_masking = avctx->border_masking;
 351     FF_ENABLE_DEPRECATION_WARNINGS
 352 #endif
 353
 354     s->adaptive_quant = (s->avctx->lumi_masking ||
 355                          s->avctx->dark_masking ||
 356                          s->avctx->temporal_cplx_masking ||
 357                          s->avctx->spatial_cplx_masking  ||
 358                          s->avctx->p_masking      ||
 359                          s->border_masking ||
 360                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 361                         !s->fixed_qscale;
 362
 363     s->loop_filter = !!(s->avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
 364
 365     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 366         av_log(avctx, AV_LOG_ERROR,
 367                "a vbv buffer size is needed, "
 368                "for encoding with a maximum bitrate\n");
 369         return -1;
 370     }
 371
 372     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 373         av_log(avctx, AV_LOG_INFO,
 374                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 375     }
 376
 377     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 378         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 379         return -1;
 380     }
 381
 382     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 383         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 384         return -1;
 385     }
 386
 387     if (avctx->rc_max_rate &&
 388         avctx->rc_max_rate == avctx->bit_rate &&
 389         avctx->rc_max_rate != avctx->rc_min_rate) {
 390         av_log(avctx, AV_LOG_INFO,
 391                "impossible bitrate constraints, this will fail\n");
 392     }
 393
 394     if (avctx->rc_buffer_size &&
 395         avctx->bit_rate * (int64_t)avctx->time_base.num >
 396             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 397         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 398         return -1;
 399     }
 400
 401     if (!s->fixed_qscale &&
 402         avctx->bit_rate * av_q2d(avctx->time_base) >
 403             avctx->bit_rate_tolerance) {
 404         av_log(avctx, AV_LOG_ERROR,
 405                "bitrate tolerance too small for bitrate\n");
 406         return -1;
 407     }
 408
 409     if (s->avctx->rc_max_rate &&
 410         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 411         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 412          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 413         90000LL * (avctx->rc_buffer_size - 1) >
 414             s->avctx->rc_max_rate * 0xFFFFLL) {
 415         av_log(avctx, AV_LOG_INFO,
 416                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 417                "specified vbv buffer is too large for the given bitrate!\n");
 418     }
 419
 420     if ((s->avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
 421         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 422         s->codec_id != AV_CODEC_ID_FLV1) {
 423         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 424         return -1;
 425     }
 426
 427     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 428         av_log(avctx, AV_LOG_ERROR,
 429                "OBMC is only supported with simple mb decision\n");
 430         return -1;
 431     }
 432
 433     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 434         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 435         return -1;
 436     }
 437
 438     if (s->max_b_frames                    &&
 439         s->codec_id != AV_CODEC_ID_MPEG4      &&
 440         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 441         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 442         av_log(avctx, AV_LOG_ERROR, "B-frames not supported by codec\n");
 443         return -1;
 444     }
 445
 446     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 447          s->codec_id == AV_CODEC_ID_H263  ||
 448          s->codec_id == AV_CODEC_ID_H263P) &&
 449         (avctx->sample_aspect_ratio.num > 255 ||
 450          avctx->sample_aspect_ratio.den > 255)) {
 451         av_log(avctx, AV_LOG_ERROR,
 452                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
 453                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 454         return -1;
 455     }
 456
 457     if ((s->avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
 458         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 459         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 460         return -1;
 461     }
 462
 463 #if FF_API_PRIVATE_OPT
 464     FF_DISABLE_DEPRECATION_WARNINGS
 465     if (avctx->mpeg_quant)
 466         s->mpeg_quant = avctx->mpeg_quant;
 467     FF_ENABLE_DEPRECATION_WARNINGS
 468 #endif
 469
 470     // FIXME mpeg2 uses that too
 471     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
 472         av_log(avctx, AV_LOG_ERROR,
 473                "mpeg2 style quantization not supported by codec\n");
 474         return -1;
 475     }
 476
 477     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 478         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 479         return -1;
 480     }
 481
 482     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 483         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 484         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 485         return -1;
 486     }
 487
 488 #if FF_API_PRIVATE_OPT
 489 FF_DISABLE_DEPRECATION_WARNINGS
 490     if (avctx->scenechange_threshold)
 491         s->scenechange_threshold = avctx->scenechange_threshold;
 492 FF_ENABLE_DEPRECATION_WARNINGS
 493 #endif
 494
 495     if (s->scenechange_threshold < 1000000000 &&
 496         (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
 497         av_log(avctx, AV_LOG_ERROR,
 498                "closed gop with scene change detection are not supported yet, "
 499                "set threshold to 1000000000\n");
 500         return -1;
 501     }
 502
 503     if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
 504         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 505             av_log(avctx, AV_LOG_ERROR,
 506                   "low delay forcing is only available for mpeg2\n");
 507             return -1;
 508         }
 509         if (s->max_b_frames != 0) {
 510             av_log(avctx, AV_LOG_ERROR,
 511                    "B-frames cannot be used with low delay\n");
 512             return -1;
 513         }
 514     }
 515
 516     if (s->q_scale_type == 1) {
 517         if (avctx->qmax > 12) {
 518             av_log(avctx, AV_LOG_ERROR,
 519                    "non linear quant only supports qmax <= 12 currently\n");
 520             return -1;
 521         }
 522     }
 523
 524     if (avctx->slices > 1 &&
 525         (avctx->codec_id == AV_CODEC_ID_FLV1 || avctx->codec_id == AV_CODEC_ID_H261)) {
 526         av_log(avctx, AV_LOG_ERROR, "Multiple slices are not supported by this codec\n");
 527         return AVERROR(EINVAL);
 528     }
 529
 530     if (s->avctx->thread_count > 1         &&
 531         s->codec_id != AV_CODEC_ID_MPEG4      &&
 532         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 533         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 534         (s->codec_id != AV_CODEC_ID_H263P)) {
 535         av_log(avctx, AV_LOG_ERROR,
 536                "multi threaded encoding not supported by codec\n");
 537         return -1;
 538     }
 539
 540     if (s->avctx->thread_count < 1) {
 541         av_log(avctx, AV_LOG_ERROR,
 542                "automatic thread number detection not supported by codec,"
 543                "patch welcome\n");
 544         return -1;
 545     }
 546
 547     if (!avctx->time_base.den || !avctx->time_base.num) {
 548         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 549         return -1;
 550     }
 551
 552 #if FF_API_PRIVATE_OPT
 553 FF_DISABLE_DEPRECATION_WARNINGS
 554     if (avctx->b_frame_strategy)
 555         s->b_frame_strategy = avctx->b_frame_strategy;
 556     if (avctx->b_sensitivity != 40)
 557         s->b_sensitivity = avctx->b_sensitivity;
 558 FF_ENABLE_DEPRECATION_WARNINGS
 559 #endif
 560
 561     if (s->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
 562         av_log(avctx, AV_LOG_INFO,
 563                "notice: b_frame_strategy only affects the first pass\n");
 564         s->b_frame_strategy = 0;
 565     }
 566
 567     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 568     if (i > 1) {
 569         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 570         avctx->time_base.den /= i;
 571         avctx->time_base.num /= i;
 572         //return -1;
 573     }
 574
 575     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 576         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
 577         // (a + x * 3 / 8) / x
 578         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 579         s->inter_quant_bias = 0;
 580     } else {
 581         s->intra_quant_bias = 0;
 582         // (a - x / 4) / x
 583         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 584     }
 585
 586 #if FF_API_QUANT_BIAS
 587 FF_DISABLE_DEPRECATION_WARNINGS
 588     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 589         s->intra_quant_bias = avctx->intra_quant_bias;
 590     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 591         s->inter_quant_bias = avctx->inter_quant_bias;
 592 FF_ENABLE_DEPRECATION_WARNINGS
 593 #endif
 594
 595     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 596         s->avctx->time_base.den > (1 << 16) - 1) {
 597         av_log(avctx, AV_LOG_ERROR,
 598                "timebase %d/%d not supported by MPEG 4 standard, "
 599                "the maximum admitted value for the timebase denominator "
 600                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 601                (1 << 16) - 1);
 602         return -1;
 603     }
 604     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 605
 606     switch (avctx->codec->id) {
 607     case AV_CODEC_ID_MPEG1VIDEO:
 608         s->out_format = FMT_MPEG1;
 609         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
 610         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 611         break;
 612     case AV_CODEC_ID_MPEG2VIDEO:
 613         s->out_format = FMT_MPEG1;
 614         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
 615         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 616         s->rtp_mode   = 1;
 617         break;
 618     case AV_CODEC_ID_MJPEG:
 619         s->out_format = FMT_MJPEG;
 620         s->intra_only = 1; /* force intra only for jpeg */
 621         if (!CONFIG_MJPEG_ENCODER ||
 622             ff_mjpeg_encode_init(s) < 0)
 623             return -1;
 624         avctx->delay = 0;
 625         s->low_delay = 1;
 626         break;
 627     case AV_CODEC_ID_H261:
 628         if (!CONFIG_H261_ENCODER)
 629             return -1;
 630         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 631             av_log(avctx, AV_LOG_ERROR,
 632                    "The specified picture size of %dx%d is not valid for the "
 633                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 634                     s->width, s->height);
 635             return -1;
 636         }
 637         s->out_format = FMT_H261;
 638         avctx->delay  = 0;
 639         s->low_delay  = 1;
 640         s->rtp_mode   = 0; /* Sliced encoding not supported */
 641         break;
 642     case AV_CODEC_ID_H263:
 643         if (!CONFIG_H263_ENCODER)
 644         return -1;
 645         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 646                              s->width, s->height) == 8) {
 647             av_log(avctx, AV_LOG_INFO,
 648                    "The specified picture size of %dx%d is not valid for "
 649                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 650                    "352x288, 704x576, and 1408x1152."
 651                    "Try H.263+.\n", s->width, s->height);
 652             return -1;
 653         }
 654         s->out_format = FMT_H263;
 655         avctx->delay  = 0;
 656         s->low_delay  = 1;
 657         break;
 658     case AV_CODEC_ID_H263P:
 659         s->out_format = FMT_H263;
 660         s->h263_plus  = 1;
 661         /* Fx */
 662         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
 663         s->modified_quant  = s->h263_aic;
 664         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 665         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 666
 667         /* /Fx */
 668         /* These are just to be sure */
 669         avctx->delay = 0;
 670         s->low_delay = 1;
 671         break;
 672     case AV_CODEC_ID_FLV1:
 673         s->out_format      = FMT_H263;
 674         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 675         s->unrestricted_mv = 1;
 676         s->rtp_mode  = 0; /* don't allow GOB */
 677         avctx->delay = 0;
 678         s->low_delay = 1;
 679         break;
 680     case AV_CODEC_ID_RV10:
 681         s->out_format = FMT_H263;
 682         avctx->delay  = 0;
 683         s->low_delay  = 1;
 684         break;
 685     case AV_CODEC_ID_RV20:
 686         s->out_format      = FMT_H263;
 687         avctx->delay       = 0;
 688         s->low_delay       = 1;
 689         s->modified_quant  = 1;
 690         s->h263_aic        = 1;
 691         s->h263_plus       = 1;
 692         s->loop_filter     = 1;
 693         s->unrestricted_mv = 0;
 694         break;
 695     case AV_CODEC_ID_MPEG4:
 696         s->out_format      = FMT_H263;
 697         s->h263_pred       = 1;
 698         s->unrestricted_mv = 1;
 699         s->low_delay       = s->max_b_frames ? 0 : 1;
 700         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 701         break;
 702     case AV_CODEC_ID_MSMPEG4V2:
 703         s->out_format      = FMT_H263;
 704         s->h263_pred       = 1;
 705         s->unrestricted_mv = 1;
 706         s->msmpeg4_version = 2;
 707         avctx->delay       = 0;
 708         s->low_delay       = 1;
 709         break;
 710     case AV_CODEC_ID_MSMPEG4V3:
 711         s->out_format        = FMT_H263;
 712         s->h263_pred         = 1;
 713         s->unrestricted_mv   = 1;
 714         s->msmpeg4_version   = 3;
 715         s->flipflop_rounding = 1;
 716         avctx->delay         = 0;
 717         s->low_delay         = 1;
 718         break;
 719     case AV_CODEC_ID_WMV1:
 720         s->out_format        = FMT_H263;
 721         s->h263_pred         = 1;
 722         s->unrestricted_mv   = 1;
 723         s->msmpeg4_version   = 4;
 724         s->flipflop_rounding = 1;
 725         avctx->delay         = 0;
 726         s->low_delay         = 1;
 727         break;
 728     case AV_CODEC_ID_WMV2:
 729         s->out_format        = FMT_H263;
 730         s->h263_pred         = 1;
 731         s->unrestricted_mv   = 1;
 732         s->msmpeg4_version   = 5;
 733         s->flipflop_rounding = 1;
 734         avctx->delay         = 0;
 735         s->low_delay         = 1;
 736         break;
 737     default:
 738         return -1;
 739     }
 740
 741 #if FF_API_PRIVATE_OPT
 742     FF_DISABLE_DEPRECATION_WARNINGS
 743     if (avctx->noise_reduction)
 744         s->noise_reduction = avctx->noise_reduction;
 745     FF_ENABLE_DEPRECATION_WARNINGS
 746 #endif
 747
 748     avctx->has_b_frames = !s->low_delay;
 749
 750     s->encoding = 1;
 751
 752     s->progressive_frame    =
 753     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
 754                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
 755                                 s->alternate_scan);
 756
 757     /* init */
 758     ff_mpv_idct_init(s);
 759     if (ff_mpv_common_init(s) < 0)
 760         return -1;
 761
 762     if (ARCH_X86)
 763         ff_mpv_encode_init_x86(s);
 764
 765     ff_fdctdsp_init(&s->fdsp, avctx);
 766     ff_me_cmp_init(&s->mecc, avctx);
 767     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 768     ff_pixblockdsp_init(&s->pdsp, avctx);
 769     ff_qpeldsp_init(&s->qdsp);
 770
 771     if (s->msmpeg4_version) {
 772         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 773                           2 * 2 * (MAX_LEVEL + 1) *
 774                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 775     }
 776     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 777
 778     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 779     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 780     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 781     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 782     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 783                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 784     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 785                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 786
 787
 788     if (s->noise_reduction) {
 789         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 790                           2 * 64 * sizeof(uint16_t), fail);
 791     }
 792
 793     if (CONFIG_H263_ENCODER)
 794         ff_h263dsp_init(&s->h263dsp);
 795     if (!s->dct_quantize)
 796         s->dct_quantize = ff_dct_quantize_c;
 797     if (!s->denoise_dct)
 798         s->denoise_dct  = denoise_dct_c;
 799     s->fast_dct_quantize = s->dct_quantize;
 800     if (avctx->trellis)
 801         s->dct_quantize  = dct_quantize_trellis_c;
 802
 803     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 804         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 805
 806     if (s->slice_context_count > 1) {
 807         s->rtp_mode = 1;
 808
 809         if (avctx->codec_id == AV_CODEC_ID_H263 || avctx->codec_id == AV_CODEC_ID_H263P)
 810             s->h263_slice_structured = 1;
 811     }
 812
 813     s->quant_precision = 5;
 814
 815 #if FF_API_PRIVATE_OPT
 816 FF_DISABLE_DEPRECATION_WARNINGS
 817     if (avctx->frame_skip_threshold)
 818         s->frame_skip_threshold = avctx->frame_skip_threshold;
 819     if (avctx->frame_skip_factor)
 820         s->frame_skip_factor = avctx->frame_skip_factor;
 821     if (avctx->frame_skip_exp)
 822         s->frame_skip_exp = avctx->frame_skip_exp;
 823     if (avctx->frame_skip_cmp != FF_CMP_DCTMAX)
 824         s->frame_skip_cmp = avctx->frame_skip_cmp;
 825 FF_ENABLE_DEPRECATION_WARNINGS
 826 #endif
 827
 828     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 829     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->frame_skip_cmp);
 830
 831     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 832         ff_h261_encode_init(s);
 833     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 834         ff_h263_encode_init(s);
 835     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 836         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
 837             return ret;
 838     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 839         && s->out_format == FMT_MPEG1)
 840         ff_mpeg1_encode_init(s);
 841
 842     /* init q matrix */
 843     for (i = 0; i < 64; i++) {
 844         int j = s->idsp.idct_permutation[i];
 845         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 846             s->mpeg_quant) {
 847             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 848             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 849         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 850             s->intra_matrix[j] =
 851             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 852         } else {
 853             /* MPEG-1/2 */
 854             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 855             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 856         }
 857         if (s->avctx->intra_matrix)
 858             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 859         if (s->avctx->inter_matrix)
 860             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 861     }
 862
 863     /* precompute matrix */
 864     /* for mjpeg, we do include qscale in the matrix */
 865     if (s->out_format != FMT_MJPEG) {
 866         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 867                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 868                           31, 1);
 869         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 870                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 871                           31, 0);
 872     }
 873
 874     if (ff_rate_control_init(s) < 0)
 875         return -1;
 876
 877 #if FF_API_ERROR_RATE
 878     FF_DISABLE_DEPRECATION_WARNINGS
 879     if (avctx->error_rate)
 880         s->error_rate = avctx->error_rate;
 881     FF_ENABLE_DEPRECATION_WARNINGS;
 882 #endif
 883
 884 #if FF_API_NORMALIZE_AQP
 885     FF_DISABLE_DEPRECATION_WARNINGS
 886     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 887         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 888     FF_ENABLE_DEPRECATION_WARNINGS;
 889 #endif
 890
 891 #if FF_API_MV0
 892     FF_DISABLE_DEPRECATION_WARNINGS
 893     if (avctx->flags & CODEC_FLAG_MV0)
 894         s->mpv_flags |= FF_MPV_FLAG_MV0;
 895     FF_ENABLE_DEPRECATION_WARNINGS
 896 #endif
 897
 898 #if FF_API_MPV_OPT
 899     FF_DISABLE_DEPRECATION_WARNINGS
 900     if (avctx->rc_qsquish != 0.0)
 901         s->rc_qsquish = avctx->rc_qsquish;
 902     if (avctx->rc_qmod_amp != 0.0)
 903         s->rc_qmod_amp = avctx->rc_qmod_amp;
 904     if (avctx->rc_qmod_freq)
 905         s->rc_qmod_freq = avctx->rc_qmod_freq;
 906     if (avctx->rc_buffer_aggressivity != 1.0)
 907         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
 908     if (avctx->rc_initial_cplx != 0.0)
 909         s->rc_initial_cplx = avctx->rc_initial_cplx;
 910     if (avctx->lmin)
 911         s->lmin = avctx->lmin;
 912     if (avctx->lmax)
 913         s->lmax = avctx->lmax;
 914
 915     if (avctx->rc_eq) {
 916         av_freep(&s->rc_eq);
 917         s->rc_eq = av_strdup(avctx->rc_eq);
 918         if (!s->rc_eq)
 919             return AVERROR(ENOMEM);
 920     }
 921     FF_ENABLE_DEPRECATION_WARNINGS
 922 #endif
 923
 924 #if FF_API_PRIVATE_OPT
 925     FF_DISABLE_DEPRECATION_WARNINGS
 926     if (avctx->brd_scale)
 927         s->brd_scale = avctx->brd_scale;
 928
 929     if (avctx->prediction_method)
 930         s->pred = avctx->prediction_method + 1;
 931     FF_ENABLE_DEPRECATION_WARNINGS
 932 #endif
 933
 934     if (s->b_frame_strategy == 2) {
 935         for (i = 0; i < s->max_b_frames + 2; i++) {
 936             s->tmp_frames[i] = av_frame_alloc();
 937             if (!s->tmp_frames[i])
 938                 return AVERROR(ENOMEM);
 939
 940             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 941             s->tmp_frames[i]->width  = s->width  >> s->brd_scale;
 942             s->tmp_frames[i]->height = s->height >> s->brd_scale;
 943
 944             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 945             if (ret < 0)
 946                 return ret;
 947         }
 948     }
 949
 950     cpb_props = ff_add_cpb_side_data(avctx);
 951     if (!cpb_props)
 952         return AVERROR(ENOMEM);
 953     cpb_props->max_bitrate = avctx->rc_max_rate;
 954     cpb_props->min_bitrate = avctx->rc_min_rate;
 955     cpb_props->avg_bitrate = avctx->bit_rate;
 956     cpb_props->buffer_size = avctx->rc_buffer_size;
 957
 958     return 0;
 959 fail:
 960     ff_mpv_encode_end(avctx);
 961     return AVERROR_UNKNOWN;
 962 }
 963
 964 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
 965 {
 966     MpegEncContext *s = avctx->priv_data;
 967     int i;
 968
 969     ff_rate_control_uninit(s);
 970
 971     ff_mpv_common_end(s);
 972     if (CONFIG_MJPEG_ENCODER &&
 973         s->out_format == FMT_MJPEG)
 974         ff_mjpeg_encode_close(s);
 975
 976     av_freep(&avctx->extradata);
 977
 978     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 979         av_frame_free(&s->tmp_frames[i]);
 980
 981     ff_free_picture_tables(&s->new_picture);
 982     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
 983
 984     av_freep(&s->avctx->stats_out);
 985     av_freep(&s->ac_stats);
 986
 987     av_freep(&s->q_intra_matrix);
 988     av_freep(&s->q_inter_matrix);
 989     av_freep(&s->q_intra_matrix16);
 990     av_freep(&s->q_inter_matrix16);
 991     av_freep(&s->input_picture);
 992     av_freep(&s->reordered_input_picture);
 993     av_freep(&s->dct_offset);
 994
 995     return 0;
 996 }
 997
 998 static int get_sae(uint8_t *src, int ref, int stride)
 999 {
1000     int x,y;
1001     int acc = 0;
1002
1003     for (y = 0; y < 16; y++) {
1004         for (x = 0; x < 16; x++) {
1005             acc += FFABS(src[x + y * stride] - ref);
1006         }
1007     }
1008
1009     return acc;
1010 }
1011
1012 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1013                            uint8_t *ref, int stride)
1014 {
1015     int x, y, w, h;
1016     int acc = 0;
1017
1018     w = s->width  & ~15;
1019     h = s->height & ~15;
1020
1021     for (y = 0; y < h; y += 16) {
1022         for (x = 0; x < w; x += 16) {
1023             int offset = x + y * stride;
1024             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1025                                       stride, 16);
1026             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1027             int sae  = get_sae(src + offset, mean, stride);
1028
1029             acc += sae + 500 < sad;
1030         }
1031     }
1032     return acc;
1033 }
1034
1035 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1036 {
1037     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1038                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1039                             s->mb_stride, s->mb_height, s->b8_stride,
1040                             &s->linesize, &s->uvlinesize);
1041 }
1042
1043 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1044 {
1045     Picture *pic = NULL;
1046     int64_t pts;
1047     int i, display_picture_number = 0, ret;
1048     int encoding_delay = s->max_b_frames ? s->max_b_frames
1049                                          : (s->low_delay ? 0 : 1);
1050     int flush_offset = 1;
1051     int direct = 1;
1052
1053     if (pic_arg) {
1054         pts = pic_arg->pts;
1055         display_picture_number = s->input_picture_number++;
1056
1057         if (pts != AV_NOPTS_VALUE) {
1058             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1059                 int64_t time = pts;
1060                 int64_t last = s->user_specified_pts;
1061
1062                 if (time <= last) {
1063                     av_log(s->avctx, AV_LOG_ERROR,
1064                            "Error, Invalid timestamp=%"PRId64", "
1065                            "last=%"PRId64"\n", pts, s->user_specified_pts);
1066                     return -1;
1067                 }
1068
1069                 if (!s->low_delay && display_picture_number == 1)
1070                     s->dts_delta = time - last;
1071             }
1072             s->user_specified_pts = pts;
1073         } else {
1074             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1075                 s->user_specified_pts =
1076                 pts = s->user_specified_pts + 1;
1077                 av_log(s->avctx, AV_LOG_INFO,
1078                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1079                        pts);
1080             } else {
1081                 pts = display_picture_number;
1082             }
1083         }
1084
1085         if (!pic_arg->buf[0] ||
1086             pic_arg->linesize[0] != s->linesize ||
1087             pic_arg->linesize[1] != s->uvlinesize ||
1088             pic_arg->linesize[2] != s->uvlinesize)
1089             direct = 0;
1090         if ((s->width & 15) || (s->height & 15))
1091             direct = 0;
1092
1093         ff_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
1094                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1095
1096         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1097         if (i < 0)
1098             return i;
1099
1100         pic = &s->picture[i];
1101         pic->reference = 3;
1102
1103         if (direct) {
1104             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1105                 return ret;
1106         }
1107         ret = alloc_picture(s, pic, direct);
1108         if (ret < 0)
1109             return ret;
1110
1111         if (!direct) {
1112             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1113                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1114                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1115                 // empty
1116             } else {
1117                 int h_chroma_shift, v_chroma_shift;
1118                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1119                                                  &h_chroma_shift,
1120                                                  &v_chroma_shift);
1121
1122                 for (i = 0; i < 3; i++) {
1123                     int src_stride = pic_arg->linesize[i];
1124                     int dst_stride = i ? s->uvlinesize : s->linesize;
1125                     int h_shift = i ? h_chroma_shift : 0;
1126                     int v_shift = i ? v_chroma_shift : 0;
1127                     int w = s->width  >> h_shift;
1128                     int h = s->height >> v_shift;
1129                     uint8_t *src = pic_arg->data[i];
1130                     uint8_t *dst = pic->f->data[i];
1131
1132                     if (!s->avctx->rc_buffer_size)
1133                         dst += INPLACE_OFFSET;
1134
1135                     if (src_stride == dst_stride)
1136                         memcpy(dst, src, src_stride * h);
1137                     else {
1138                         int h2 = h;
1139                         uint8_t *dst2 = dst;
1140                         while (h2--) {
1141                             memcpy(dst2, src, w);
1142                             dst2 += dst_stride;
1143                             src += src_stride;
1144                         }
1145                     }
1146                     if ((s->width & 15) || (s->height & 15)) {
1147                         s->mpvencdsp.draw_edges(dst, dst_stride,
1148                                                 w, h,
1149                                                 16 >> h_shift,
1150                                                 16 >> v_shift,
1151                                                 EDGE_BOTTOM);
1152                     }
1153                 }
1154             }
1155         }
1156         ret = av_frame_copy_props(pic->f, pic_arg);
1157         if (ret < 0)
1158             return ret;
1159
1160         pic->f->display_picture_number = display_picture_number;
1161         pic->f->pts = pts; // we set this here to avoid modifying pic_arg
1162     } else {
1163         /* Flushing: When we have not received enough input frames,
1164          * ensure s->input_picture[0] contains the first picture */
1165         for (flush_offset = 0; flush_offset < encoding_delay + 1; flush_offset++)
1166             if (s->input_picture[flush_offset])
1167                 break;
1168
1169         if (flush_offset <= 1)
1170             flush_offset = 1;
1171         else
1172             encoding_delay = encoding_delay - flush_offset + 1;
1173     }
1174
1175     /* shift buffer entries */
1176     for (i = flush_offset; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1177         s->input_picture[i - flush_offset] = s->input_picture[i];
1178
1179     s->input_picture[encoding_delay] = (Picture*) pic;
1180
1181     return 0;
1182 }
1183
1184 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1185 {
1186     int x, y, plane;
1187     int score = 0;
1188     int64_t score64 = 0;
1189
1190     for (plane = 0; plane < 3; plane++) {
1191         const int stride = p->f->linesize[plane];
1192         const int bw = plane ? 1 : 2;
1193         for (y = 0; y < s->mb_height * bw; y++) {
1194             for (x = 0; x < s->mb_width * bw; x++) {
1195                 int off = p->shared ? 0 : 16;
1196                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1197                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1198                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1199
1200                 switch (s->frame_skip_exp) {
1201                 case 0: score    =  FFMAX(score, v);          break;
1202                 case 1: score   += FFABS(v);                  break;
1203                 case 2: score   += v * v;                     break;
1204                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1205                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1206                 }
1207             }
1208         }
1209     }
1210
1211     if (score)
1212         score64 = score;
1213
1214     if (score64 < s->frame_skip_threshold)
1215         return 1;
1216     if (score64 < ((s->frame_skip_factor * (int64_t) s->lambda) >> 8))
1217         return 1;
1218     return 0;
1219 }
1220
1221 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1222 {
1223     AVPacket pkt = { 0 };
1224     int ret, got_output;
1225
1226     av_init_packet(&pkt);
1227     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1228     if (ret < 0)
1229         return ret;
1230
1231     ret = pkt.size;
1232     av_packet_unref(&pkt);
1233     return ret;
1234 }
1235
1236 static int estimate_best_b_count(MpegEncContext *s)
1237 {
1238     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1239     AVCodecContext *c = avcodec_alloc_context3(NULL);
1240     const int scale = s->brd_scale;
1241     int i, j, out_size, p_lambda, b_lambda, lambda2;
1242     int64_t best_rd  = INT64_MAX;
1243     int best_b_count = -1;
1244
1245     if (!c)
1246         return AVERROR(ENOMEM);
1247     assert(scale >= 0 && scale <= 3);
1248
1249     //emms_c();
1250     //s->next_picture_ptr->quality;
1251     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1252     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1253     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1254     if (!b_lambda) // FIXME we should do this somewhere else
1255         b_lambda = p_lambda;
1256     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1257                FF_LAMBDA_SHIFT;
1258
1259     c->width        = s->width  >> scale;
1260     c->height       = s->height >> scale;
1261     c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1262     c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1263     c->mb_decision  = s->avctx->mb_decision;
1264     c->me_cmp       = s->avctx->me_cmp;
1265     c->mb_cmp       = s->avctx->mb_cmp;
1266     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1267     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1268     c->time_base    = s->avctx->time_base;
1269     c->max_b_frames = s->max_b_frames;
1270
1271     if (avcodec_open2(c, codec, NULL) < 0)
1272         return -1;
1273
1274     for (i = 0; i < s->max_b_frames + 2; i++) {
1275         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1276                                                 s->next_picture_ptr;
1277
1278         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1279             pre_input = *pre_input_ptr;
1280
1281             if (!pre_input.shared && i) {
1282                 pre_input.f->data[0] += INPLACE_OFFSET;
1283                 pre_input.f->data[1] += INPLACE_OFFSET;
1284                 pre_input.f->data[2] += INPLACE_OFFSET;
1285             }
1286
1287             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1288                                        s->tmp_frames[i]->linesize[0],
1289                                        pre_input.f->data[0],
1290                                        pre_input.f->linesize[0],
1291                                        c->width, c->height);
1292             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1293                                        s->tmp_frames[i]->linesize[1],
1294                                        pre_input.f->data[1],
1295                                        pre_input.f->linesize[1],
1296                                        c->width >> 1, c->height >> 1);
1297             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1298                                        s->tmp_frames[i]->linesize[2],
1299                                        pre_input.f->data[2],
1300                                        pre_input.f->linesize[2],
1301                                        c->width >> 1, c->height >> 1);
1302         }
1303     }
1304
1305     for (j = 0; j < s->max_b_frames + 1; j++) {
1306         int64_t rd = 0;
1307
1308         if (!s->input_picture[j])
1309             break;
1310
1311         c->error[0] = c->error[1] = c->error[2] = 0;
1312
1313         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1314         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1315
1316         out_size = encode_frame(c, s->tmp_frames[0]);
1317
1318         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1319
1320         for (i = 0; i < s->max_b_frames + 1; i++) {
1321             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1322
1323             s->tmp_frames[i + 1]->pict_type = is_p ?
1324                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1325             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1326
1327             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1328
1329             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1330         }
1331
1332         /* get the delayed frames */
1333         while (out_size) {
1334             out_size = encode_frame(c, NULL);
1335             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1336         }
1337
1338         rd += c->error[0] + c->error[1] + c->error[2];
1339
1340         if (rd < best_rd) {
1341             best_rd = rd;
1342             best_b_count = j;
1343         }
1344     }
1345
1346     avcodec_free_context(&c);
1347
1348     return best_b_count;
1349 }
1350
1351 static int select_input_picture(MpegEncContext *s)
1352 {
1353     int i, ret;
1354
1355     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1356         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1357     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1358
1359     /* set next picture type & ordering */
1360     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1361         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1362             !s->next_picture_ptr || s->intra_only) {
1363             s->reordered_input_picture[0] = s->input_picture[0];
1364             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1365             s->reordered_input_picture[0]->f->coded_picture_number =
1366                 s->coded_picture_number++;
1367         } else {
1368             int b_frames = 0;
1369
1370             if (s->frame_skip_threshold || s->frame_skip_factor) {
1371                 if (s->picture_in_gop_number < s->gop_size &&
1372                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1373                     // FIXME check that the gop check above is +-1 correct
1374                     av_frame_unref(s->input_picture[0]->f);
1375
1376                     emms_c();
1377                     ff_vbv_update(s, 0);
1378
1379                     goto no_output_pic;
1380                 }
1381             }
1382
1383             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1384                 for (i = 0; i < s->max_b_frames + 1; i++) {
1385                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1386
1387                     if (pict_num >= s->rc_context.num_entries)
1388                         break;
1389                     if (!s->input_picture[i]) {
1390                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1391                         break;
1392                     }
1393
1394                     s->input_picture[i]->f->pict_type =
1395                         s->rc_context.entry[pict_num].new_pict_type;
1396                 }
1397             }
1398
1399             if (s->b_frame_strategy == 0) {
1400                 b_frames = s->max_b_frames;
1401                 while (b_frames && !s->input_picture[b_frames])
1402                     b_frames--;
1403             } else if (s->b_frame_strategy == 1) {
1404                 for (i = 1; i < s->max_b_frames + 1; i++) {
1405                     if (s->input_picture[i] &&
1406                         s->input_picture[i]->b_frame_score == 0) {
1407                         s->input_picture[i]->b_frame_score =
1408                             get_intra_count(s,
1409                                             s->input_picture[i    ]->f->data[0],
1410                                             s->input_picture[i - 1]->f->data[0],
1411                                             s->linesize) + 1;
1412                     }
1413                 }
1414                 for (i = 0; i < s->max_b_frames + 1; i++) {
1415                     if (!s->input_picture[i] ||
1416                         s->input_picture[i]->b_frame_score - 1 >
1417                             s->mb_num / s->b_sensitivity)
1418                         break;
1419                 }
1420
1421                 b_frames = FFMAX(0, i - 1);
1422
1423                 /* reset scores */
1424                 for (i = 0; i < b_frames + 1; i++) {
1425                     s->input_picture[i]->b_frame_score = 0;
1426                 }
1427             } else if (s->b_frame_strategy == 2) {
1428                 b_frames = estimate_best_b_count(s);
1429             }
1430
1431             emms_c();
1432
1433             for (i = b_frames - 1; i >= 0; i--) {
1434                 int type = s->input_picture[i]->f->pict_type;
1435                 if (type && type != AV_PICTURE_TYPE_B)
1436                     b_frames = i;
1437             }
1438             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1439                 b_frames == s->max_b_frames) {
1440                 av_log(s->avctx, AV_LOG_ERROR,
1441                        "warning, too many B-frames in a row\n");
1442             }
1443
1444             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1445                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1446                     s->gop_size > s->picture_in_gop_number) {
1447                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1448                 } else {
1449                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1450                         b_frames = 0;
1451                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1452                 }
1453             }
1454
1455             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1456                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1457                 b_frames--;
1458
1459             s->reordered_input_picture[0] = s->input_picture[b_frames];
1460             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1461                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1462             s->reordered_input_picture[0]->f->coded_picture_number =
1463                 s->coded_picture_number++;
1464             for (i = 0; i < b_frames; i++) {
1465                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1466                 s->reordered_input_picture[i + 1]->f->pict_type =
1467                     AV_PICTURE_TYPE_B;
1468                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1469                     s->coded_picture_number++;
1470             }
1471         }
1472     }
1473 no_output_pic:
1474     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1475
1476     if (s->reordered_input_picture[0]) {
1477         s->reordered_input_picture[0]->reference =
1478            s->reordered_input_picture[0]->f->pict_type !=
1479                AV_PICTURE_TYPE_B ? 3 : 0;
1480
1481         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1482             return ret;
1483
1484         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1485             // input is a shared pix, so we can't modify it -> allocate a new
1486             // one & ensure that the shared one is reuseable
1487
1488             Picture *pic;
1489             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1490             if (i < 0)
1491                 return i;
1492             pic = &s->picture[i];
1493
1494             pic->reference = s->reordered_input_picture[0]->reference;
1495             if (alloc_picture(s, pic, 0) < 0) {
1496                 return -1;
1497             }
1498
1499             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1500             if (ret < 0)
1501                 return ret;
1502
1503             /* mark us unused / free shared pic */
1504             av_frame_unref(s->reordered_input_picture[0]->f);
1505             s->reordered_input_picture[0]->shared = 0;
1506
1507             s->current_picture_ptr = pic;
1508         } else {
1509             // input is not a shared pix -> reuse buffer for current_pix
1510             s->current_picture_ptr = s->reordered_input_picture[0];
1511             for (i = 0; i < 4; i++) {
1512                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1513             }
1514         }
1515         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1516         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1517                                        s->current_picture_ptr)) < 0)
1518             return ret;
1519
1520         s->picture_number = s->new_picture.f->display_picture_number;
1521     }
1522     return 0;
1523 }
1524
1525 static void frame_end(MpegEncContext *s)
1526 {
1527     int i;
1528
1529     if (s->unrestricted_mv &&
1530         s->current_picture.reference &&
1531         !s->intra_only) {
1532         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1533         int hshift = desc->log2_chroma_w;
1534         int vshift = desc->log2_chroma_h;
1535         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1536                                 s->h_edge_pos, s->v_edge_pos,
1537                                 EDGE_WIDTH, EDGE_WIDTH,
1538                                 EDGE_TOP | EDGE_BOTTOM);
1539         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1540                                 s->h_edge_pos >> hshift,
1541                                 s->v_edge_pos >> vshift,
1542                                 EDGE_WIDTH >> hshift,
1543                                 EDGE_WIDTH >> vshift,
1544                                 EDGE_TOP | EDGE_BOTTOM);
1545         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1546                                 s->h_edge_pos >> hshift,
1547                                 s->v_edge_pos >> vshift,
1548                                 EDGE_WIDTH >> hshift,
1549                                 EDGE_WIDTH >> vshift,
1550                                 EDGE_TOP | EDGE_BOTTOM);
1551     }
1552
1553     emms_c();
1554
1555     s->last_pict_type                 = s->pict_type;
1556     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1557     if (s->pict_type!= AV_PICTURE_TYPE_B)
1558         s->last_non_b_pict_type = s->pict_type;
1559
1560     if (s->encoding) {
1561         /* release non-reference frames */
1562         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1563             if (!s->picture[i].reference)
1564                 ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1565         }
1566     }
1567
1568 #if FF_API_CODED_FRAME
1569 FF_DISABLE_DEPRECATION_WARNINGS
1570     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1571 FF_ENABLE_DEPRECATION_WARNINGS
1572 #endif
1573 #if FF_API_ERROR_FRAME
1574 FF_DISABLE_DEPRECATION_WARNINGS
1575     memcpy(s->current_picture.f->error, s->current_picture.encoding_error,
1576            sizeof(s->current_picture.encoding_error));
1577 FF_ENABLE_DEPRECATION_WARNINGS
1578 #endif
1579 }
1580
1581 static void update_noise_reduction(MpegEncContext *s)
1582 {
1583     int intra, i;
1584
1585     for (intra = 0; intra < 2; intra++) {
1586         if (s->dct_count[intra] > (1 << 16)) {
1587             for (i = 0; i < 64; i++) {
1588                 s->dct_error_sum[intra][i] >>= 1;
1589             }
1590             s->dct_count[intra] >>= 1;
1591         }
1592
1593         for (i = 0; i < 64; i++) {
1594             s->dct_offset[intra][i] = (s->noise_reduction *
1595                                        s->dct_count[intra] +
1596                                        s->dct_error_sum[intra][i] / 2) /
1597                                       (s->dct_error_sum[intra][i] + 1);
1598         }
1599     }
1600 }
1601
1602 static int frame_start(MpegEncContext *s)
1603 {
1604     int ret;
1605
1606     /* mark & release old frames */
1607     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1608         s->last_picture_ptr != s->next_picture_ptr &&
1609         s->last_picture_ptr->f->buf[0]) {
1610         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1611     }
1612
1613     s->current_picture_ptr->f->pict_type = s->pict_type;
1614     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1615
1616     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1617     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1618                                    s->current_picture_ptr)) < 0)
1619         return ret;
1620
1621     if (s->pict_type != AV_PICTURE_TYPE_B) {
1622         s->last_picture_ptr = s->next_picture_ptr;
1623         if (!s->droppable)
1624             s->next_picture_ptr = s->current_picture_ptr;
1625     }
1626
1627     if (s->last_picture_ptr) {
1628         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1629         if (s->last_picture_ptr->f->buf[0] &&
1630             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1631                                        s->last_picture_ptr)) < 0)
1632             return ret;
1633     }
1634     if (s->next_picture_ptr) {
1635         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1636         if (s->next_picture_ptr->f->buf[0] &&
1637             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1638                                        s->next_picture_ptr)) < 0)
1639             return ret;
1640     }
1641
1642     if (s->picture_structure!= PICT_FRAME) {
1643         int i;
1644         for (i = 0; i < 4; i++) {
1645             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1646                 s->current_picture.f->data[i] +=
1647                     s->current_picture.f->linesize[i];
1648             }
1649             s->current_picture.f->linesize[i] *= 2;
1650             s->last_picture.f->linesize[i]    *= 2;
1651             s->next_picture.f->linesize[i]    *= 2;
1652         }
1653     }
1654
1655     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1656         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1657         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1658     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1659         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1660         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1661     } else {
1662         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1663         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1664     }
1665
1666     if (s->dct_error_sum) {
1667         assert(s->noise_reduction && s->encoding);
1668         update_noise_reduction(s);
1669     }
1670
1671     return 0;
1672 }
1673
1674 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1675                           const AVFrame *pic_arg, int *got_packet)
1676 {
1677     MpegEncContext *s = avctx->priv_data;
1678     int i, stuffing_count, ret;
1679     int context_count = s->slice_context_count;
1680
1681     s->picture_in_gop_number++;
1682
1683     if (load_input_picture(s, pic_arg) < 0)
1684         return -1;
1685
1686     if (select_input_picture(s) < 0) {
1687         return -1;
1688     }
1689
1690     /* output? */
1691     if (s->new_picture.f->data[0]) {
1692         uint8_t *sd;
1693         if (!pkt->data &&
1694             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1695             return ret;
1696         if (s->mb_info) {
1697             s->mb_info_ptr = av_packet_new_side_data(pkt,
1698                                  AV_PKT_DATA_H263_MB_INFO,
1699                                  s->mb_width*s->mb_height*12);
1700             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1701         }
1702
1703         for (i = 0; i < context_count; i++) {
1704             int start_y = s->thread_context[i]->start_mb_y;
1705             int   end_y = s->thread_context[i]->  end_mb_y;
1706             int h       = s->mb_height;
1707             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1708             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1709
1710             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1711         }
1712
1713         s->pict_type = s->new_picture.f->pict_type;
1714         //emms_c();
1715         ret = frame_start(s);
1716         if (ret < 0)
1717             return ret;
1718 vbv_retry:
1719         if (encode_picture(s, s->picture_number) < 0)
1720             return -1;
1721
1722 #if FF_API_STAT_BITS
1723 FF_DISABLE_DEPRECATION_WARNINGS
1724         avctx->header_bits = s->header_bits;
1725         avctx->mv_bits     = s->mv_bits;
1726         avctx->misc_bits   = s->misc_bits;
1727         avctx->i_tex_bits  = s->i_tex_bits;
1728         avctx->p_tex_bits  = s->p_tex_bits;
1729         avctx->i_count     = s->i_count;
1730         // FIXME f/b_count in avctx
1731         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1732         avctx->skip_count  = s->skip_count;
1733 FF_ENABLE_DEPRECATION_WARNINGS
1734 #endif
1735
1736         frame_end(s);
1737
1738         sd = av_packet_new_side_data(pkt, AV_PKT_DATA_QUALITY_FACTOR,
1739                                      sizeof(int));
1740         if (!sd)
1741             return AVERROR(ENOMEM);
1742         *(int *)sd = s->current_picture.f->quality;
1743
1744         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1745             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1746
1747         if (avctx->rc_buffer_size) {
1748             RateControlContext *rcc = &s->rc_context;
1749             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1750
1751             if (put_bits_count(&s->pb) > max_size &&
1752                 s->lambda < s->lmax) {
1753                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1754                                        (s->qscale + 1) / s->qscale);
1755                 if (s->adaptive_quant) {
1756                     int i;
1757                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1758                         s->lambda_table[i] =
1759                             FFMAX(s->lambda_table[i] + 1,
1760                                   s->lambda_table[i] * (s->qscale + 1) /
1761                                   s->qscale);
1762                 }
1763                 s->mb_skipped = 0;        // done in frame_start()
1764                 // done in encode_picture() so we must undo it
1765                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1766                     if (s->flipflop_rounding          ||
1767                         s->codec_id == AV_CODEC_ID_H263P ||
1768                         s->codec_id == AV_CODEC_ID_MPEG4)
1769                         s->no_rounding ^= 1;
1770                 }
1771                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1772                     s->time_base       = s->last_time_base;
1773                     s->last_non_b_time = s->time - s->pp_time;
1774                 }
1775                 for (i = 0; i < context_count; i++) {
1776                     PutBitContext *pb = &s->thread_context[i]->pb;
1777                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1778                 }
1779                 goto vbv_retry;
1780             }
1781
1782             assert(s->avctx->rc_max_rate);
1783         }
1784
1785         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1786             ff_write_pass1_stats(s);
1787
1788         for (i = 0; i < 4; i++) {
1789             s->current_picture_ptr->encoding_error[i] = s->current_picture.encoding_error[i];
1790             avctx->error[i] += s->current_picture_ptr->encoding_error[i];
1791         }
1792
1793         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1794             assert(put_bits_count(&s->pb) == s->header_bits + s->mv_bits +
1795                                              s->misc_bits + s->i_tex_bits +
1796                                              s->p_tex_bits);
1797         flush_put_bits(&s->pb);
1798         s->frame_bits  = put_bits_count(&s->pb);
1799
1800         stuffing_count = ff_vbv_update(s, s->frame_bits);
1801         if (stuffing_count) {
1802             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1803                     stuffing_count + 50) {
1804                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1805                 return -1;
1806             }
1807
1808             switch (s->codec_id) {
1809             case AV_CODEC_ID_MPEG1VIDEO:
1810             case AV_CODEC_ID_MPEG2VIDEO:
1811                 while (stuffing_count--) {
1812                     put_bits(&s->pb, 8, 0);
1813                 }
1814             break;
1815             case AV_CODEC_ID_MPEG4:
1816                 put_bits(&s->pb, 16, 0);
1817                 put_bits(&s->pb, 16, 0x1C3);
1818                 stuffing_count -= 4;
1819                 while (stuffing_count--) {
1820                     put_bits(&s->pb, 8, 0xFF);
1821                 }
1822             break;
1823             default:
1824                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1825             }
1826             flush_put_bits(&s->pb);
1827             s->frame_bits  = put_bits_count(&s->pb);
1828         }
1829
1830         /* update MPEG-1/2 vbv_delay for CBR */
1831         if (s->avctx->rc_max_rate                          &&
1832             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1833             s->out_format == FMT_MPEG1                     &&
1834             90000LL * (avctx->rc_buffer_size - 1) <=
1835                 s->avctx->rc_max_rate * 0xFFFFLL) {
1836             AVCPBProperties *props;
1837             size_t props_size;
1838
1839             int vbv_delay, min_delay;
1840             double inbits  = s->avctx->rc_max_rate *
1841                              av_q2d(s->avctx->time_base);
1842             int    minbits = s->frame_bits - 8 *
1843                              (s->vbv_delay_ptr - s->pb.buf - 1);
1844             double bits    = s->rc_context.buffer_index + minbits - inbits;
1845
1846             if (bits < 0)
1847                 av_log(s->avctx, AV_LOG_ERROR,
1848                        "Internal error, negative bits\n");
1849
1850             assert(s->repeat_first_field == 0);
1851
1852             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1853             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1854                         s->avctx->rc_max_rate;
1855
1856             vbv_delay = FFMAX(vbv_delay, min_delay);
1857
1858             assert(vbv_delay < 0xFFFF);
1859
1860             s->vbv_delay_ptr[0] &= 0xF8;
1861             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1862             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1863             s->vbv_delay_ptr[2] &= 0x07;
1864             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1865
1866             props = av_cpb_properties_alloc(&props_size);
1867             if (!props)
1868                 return AVERROR(ENOMEM);
1869             props->vbv_delay = vbv_delay * 300;
1870
1871             ret = av_packet_add_side_data(pkt, AV_PKT_DATA_CPB_PROPERTIES,
1872                                           (uint8_t*)props, props_size);
1873             if (ret < 0) {
1874                 av_freep(&props);
1875                 return ret;
1876             }
1877
1878 #if FF_API_VBV_DELAY
1879 FF_DISABLE_DEPRECATION_WARNINGS
1880             avctx->vbv_delay     = vbv_delay * 300;
1881 FF_ENABLE_DEPRECATION_WARNINGS
1882 #endif
1883         }
1884         s->total_bits     += s->frame_bits;
1885 #if FF_API_STAT_BITS
1886 FF_DISABLE_DEPRECATION_WARNINGS
1887         avctx->frame_bits  = s->frame_bits;
1888 FF_ENABLE_DEPRECATION_WARNINGS
1889 #endif
1890
1891
1892         pkt->pts = s->current_picture.f->pts;
1893         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1894             if (!s->current_picture.f->coded_picture_number)
1895                 pkt->dts = pkt->pts - s->dts_delta;
1896             else
1897                 pkt->dts = s->reordered_pts;
1898             s->reordered_pts = pkt->pts;
1899         } else
1900             pkt->dts = pkt->pts;
1901         if (s->current_picture.f->key_frame)
1902             pkt->flags |= AV_PKT_FLAG_KEY;
1903         if (s->mb_info)
1904             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1905     } else {
1906         s->frame_bits = 0;
1907     }
1908     assert((s->frame_bits & 7) == 0);
1909
1910     pkt->size = s->frame_bits / 8;
1911     *got_packet = !!pkt->size;
1912     return 0;
1913 }
1914
1915 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1916                                                 int n, int threshold)
1917 {
1918     static const char tab[64] = {
1919         3, 2, 2, 1, 1, 1, 1, 1,
1920         1, 1, 1, 1, 1, 1, 1, 1,
1921         1, 1, 1, 1, 1, 1, 1, 1,
1922         0, 0, 0, 0, 0, 0, 0, 0,
1923         0, 0, 0, 0, 0, 0, 0, 0,
1924         0, 0, 0, 0, 0, 0, 0, 0,
1925         0, 0, 0, 0, 0, 0, 0, 0,
1926         0, 0, 0, 0, 0, 0, 0, 0
1927     };
1928     int score = 0;
1929     int run = 0;
1930     int i;
1931     int16_t *block = s->block[n];
1932     const int last_index = s->block_last_index[n];
1933     int skip_dc;
1934
1935     if (threshold < 0) {
1936         skip_dc = 0;
1937         threshold = -threshold;
1938     } else
1939         skip_dc = 1;
1940
1941     /* Are all we could set to zero already zero? */
1942     if (last_index <= skip_dc - 1)
1943         return;
1944
1945     for (i = 0; i <= last_index; i++) {
1946         const int j = s->intra_scantable.permutated[i];
1947         const int level = FFABS(block[j]);
1948         if (level == 1) {
1949             if (skip_dc && i == 0)
1950                 continue;
1951             score += tab[run];
1952             run = 0;
1953         } else if (level > 1) {
1954             return;
1955         } else {
1956             run++;
1957         }
1958     }
1959     if (score >= threshold)
1960         return;
1961     for (i = skip_dc; i <= last_index; i++) {
1962         const int j = s->intra_scantable.permutated[i];
1963         block[j] = 0;
1964     }
1965     if (block[0])
1966         s->block_last_index[n] = 0;
1967     else
1968         s->block_last_index[n] = -1;
1969 }
1970
1971 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1972                                int last_index)
1973 {
1974     int i;
1975     const int maxlevel = s->max_qcoeff;
1976     const int minlevel = s->min_qcoeff;
1977     int overflow = 0;
1978
1979     if (s->mb_intra) {
1980         i = 1; // skip clipping of intra dc
1981     } else
1982         i = 0;
1983
1984     for (; i <= last_index; i++) {
1985         const int j = s->intra_scantable.permutated[i];
1986         int level = block[j];
1987
1988         if (level > maxlevel) {
1989             level = maxlevel;
1990             overflow++;
1991         } else if (level < minlevel) {
1992             level = minlevel;
1993             overflow++;
1994         }
1995
1996         block[j] = level;
1997     }
1998
1999     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2000         av_log(s->avctx, AV_LOG_INFO,
2001                "warning, clipping %d dct coefficients to %d..%d\n",
2002                overflow, minlevel, maxlevel);
2003 }
2004
2005 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2006 {
2007     int x, y;
2008     // FIXME optimize
2009     for (y = 0; y < 8; y++) {
2010         for (x = 0; x < 8; x++) {
2011             int x2, y2;
2012             int sum = 0;
2013             int sqr = 0;
2014             int count = 0;
2015
2016             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2017                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2018                     int v = ptr[x2 + y2 * stride];
2019                     sum += v;
2020                     sqr += v * v;
2021                     count++;
2022                 }
2023             }
2024             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2025         }
2026     }
2027 }
2028
2029 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2030                                                 int motion_x, int motion_y,
2031                                                 int mb_block_height,
2032                                                 int mb_block_count)
2033 {
2034     int16_t weight[8][64];
2035     int16_t orig[8][64];
2036     const int mb_x = s->mb_x;
2037     const int mb_y = s->mb_y;
2038     int i;
2039     int skip_dct[8];
2040     int dct_offset = s->linesize * 8; // default for progressive frames
2041     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2042     ptrdiff_t wrap_y, wrap_c;
2043
2044     for (i = 0; i < mb_block_count; i++)
2045         skip_dct[i] = s->skipdct;
2046
2047     if (s->adaptive_quant) {
2048         const int last_qp = s->qscale;
2049         const int mb_xy = mb_x + mb_y * s->mb_stride;
2050
2051         s->lambda = s->lambda_table[mb_xy];
2052         update_qscale(s);
2053
2054         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2055             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2056             s->dquant = s->qscale - last_qp;
2057
2058             if (s->out_format == FMT_H263) {
2059                 s->dquant = av_clip(s->dquant, -2, 2);
2060
2061                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2062                     if (!s->mb_intra) {
2063                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2064                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2065                                 s->dquant = 0;
2066                         }
2067                         if (s->mv_type == MV_TYPE_8X8)
2068                             s->dquant = 0;
2069                     }
2070                 }
2071             }
2072         }
2073         ff_set_qscale(s, last_qp + s->dquant);
2074     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2075         ff_set_qscale(s, s->qscale + s->dquant);
2076
2077     wrap_y = s->linesize;
2078     wrap_c = s->uvlinesize;
2079     ptr_y  = s->new_picture.f->data[0] +
2080              (mb_y * 16 * wrap_y)              + mb_x * 16;
2081     ptr_cb = s->new_picture.f->data[1] +
2082              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
2083     ptr_cr = s->new_picture.f->data[2] +
2084              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
2085
2086     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
2087         uint8_t *ebuf = s->sc.edge_emu_buffer + 32;
2088         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2089                                  wrap_y, wrap_y,
2090                                  16, 16, mb_x * 16, mb_y * 16,
2091                                  s->width, s->height);
2092         ptr_y = ebuf;
2093         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
2094                                  wrap_c, wrap_c,
2095                                  8, mb_block_height, mb_x * 8, mb_y * 8,
2096                                  s->width >> 1, s->height >> 1);
2097         ptr_cb = ebuf + 18 * wrap_y;
2098         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
2099                                  wrap_c, wrap_c,
2100                                  8, mb_block_height, mb_x * 8, mb_y * 8,
2101                                  s->width >> 1, s->height >> 1);
2102         ptr_cr = ebuf + 18 * wrap_y + 8;
2103     }
2104
2105     if (s->mb_intra) {
2106         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2107             int progressive_score, interlaced_score;
2108
2109             s->interlaced_dct = 0;
2110             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2111                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2112                                                      NULL, wrap_y, 8) - 400;
2113
2114             if (progressive_score > 0) {
2115                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2116                                                         NULL, wrap_y * 2, 8) +
2117                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2118                                                         NULL, wrap_y * 2, 8);
2119                 if (progressive_score > interlaced_score) {
2120                     s->interlaced_dct = 1;
2121
2122                     dct_offset = wrap_y;
2123                     wrap_y <<= 1;
2124                     if (s->chroma_format == CHROMA_422)
2125                         wrap_c <<= 1;
2126                 }
2127             }
2128         }
2129
2130         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2131         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2132         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2133         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2134
2135         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2136             skip_dct[4] = 1;
2137             skip_dct[5] = 1;
2138         } else {
2139             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2140             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2141             if (!s->chroma_y_shift) { /* 422 */
2142                 s->pdsp.get_pixels(s->block[6],
2143                                    ptr_cb + (dct_offset >> 1), wrap_c);
2144                 s->pdsp.get_pixels(s->block[7],
2145                                    ptr_cr + (dct_offset >> 1), wrap_c);
2146             }
2147         }
2148     } else {
2149         op_pixels_func (*op_pix)[4];
2150         qpel_mc_func (*op_qpix)[16];
2151         uint8_t *dest_y, *dest_cb, *dest_cr;
2152
2153         dest_y  = s->dest[0];
2154         dest_cb = s->dest[1];
2155         dest_cr = s->dest[2];
2156
2157         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2158             op_pix  = s->hdsp.put_pixels_tab;
2159             op_qpix = s->qdsp.put_qpel_pixels_tab;
2160         } else {
2161             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2162             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2163         }
2164
2165         if (s->mv_dir & MV_DIR_FORWARD) {
2166             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2167                           s->last_picture.f->data,
2168                           op_pix, op_qpix);
2169             op_pix  = s->hdsp.avg_pixels_tab;
2170             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2171         }
2172         if (s->mv_dir & MV_DIR_BACKWARD) {
2173             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2174                           s->next_picture.f->data,
2175                           op_pix, op_qpix);
2176         }
2177
2178         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2179             int progressive_score, interlaced_score;
2180
2181             s->interlaced_dct = 0;
2182             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2183                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2184                                                      ptr_y + wrap_y * 8,
2185                                                      wrap_y, 8) - 400;
2186
2187             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2188                 progressive_score -= 400;
2189
2190             if (progressive_score > 0) {
2191                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2192                                                         wrap_y * 2, 8) +
2193                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2194                                                         ptr_y + wrap_y,
2195                                                         wrap_y * 2, 8);
2196
2197                 if (progressive_score > interlaced_score) {
2198                     s->interlaced_dct = 1;
2199
2200                     dct_offset = wrap_y;
2201                     wrap_y <<= 1;
2202                     if (s->chroma_format == CHROMA_422)
2203                         wrap_c <<= 1;
2204                 }
2205             }
2206         }
2207
2208         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2209         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2210         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2211                             dest_y + dct_offset, wrap_y);
2212         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2213                             dest_y + dct_offset + 8, wrap_y);
2214
2215         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2216             skip_dct[4] = 1;
2217             skip_dct[5] = 1;
2218         } else {
2219             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2220             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2221             if (!s->chroma_y_shift) { /* 422 */
2222                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2223                                     dest_cb + (dct_offset >> 1), wrap_c);
2224                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2225                                     dest_cr + (dct_offset >> 1), wrap_c);
2226             }
2227         }
2228         /* pre quantization */
2229         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2230                 2 * s->qscale * s->qscale) {
2231             // FIXME optimize
2232             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2233                 skip_dct[0] = 1;
2234             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2235                 skip_dct[1] = 1;
2236             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2237                                wrap_y, 8) < 20 * s->qscale)
2238                 skip_dct[2] = 1;
2239             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2240                                wrap_y, 8) < 20 * s->qscale)
2241                 skip_dct[3] = 1;
2242             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2243                 skip_dct[4] = 1;
2244             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2245                 skip_dct[5] = 1;
2246             if (!s->chroma_y_shift) { /* 422 */
2247                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2248                                    dest_cb + (dct_offset >> 1),
2249                                    wrap_c, 8) < 20 * s->qscale)
2250                     skip_dct[6] = 1;
2251                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2252                                    dest_cr + (dct_offset >> 1),
2253                                    wrap_c, 8) < 20 * s->qscale)
2254                     skip_dct[7] = 1;
2255             }
2256         }
2257     }
2258
2259     if (s->quantizer_noise_shaping) {
2260         if (!skip_dct[0])
2261             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2262         if (!skip_dct[1])
2263             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2264         if (!skip_dct[2])
2265             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2266         if (!skip_dct[3])
2267             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2268         if (!skip_dct[4])
2269             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2270         if (!skip_dct[5])
2271             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2272         if (!s->chroma_y_shift) { /* 422 */
2273             if (!skip_dct[6])
2274                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2275                                   wrap_c);
2276             if (!skip_dct[7])
2277                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2278                                   wrap_c);
2279         }
2280         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2281     }
2282
2283     /* DCT & quantize */
2284     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2285     {
2286         for (i = 0; i < mb_block_count; i++) {
2287             if (!skip_dct[i]) {
2288                 int overflow;
2289                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2290                 // FIXME we could decide to change to quantizer instead of
2291                 // clipping
2292                 // JS: I don't think that would be a good idea it could lower
2293                 //     quality instead of improve it. Just INTRADC clipping
2294                 //     deserves changes in quantizer
2295                 if (overflow)
2296                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2297             } else
2298                 s->block_last_index[i] = -1;
2299         }
2300         if (s->quantizer_noise_shaping) {
2301             for (i = 0; i < mb_block_count; i++) {
2302                 if (!skip_dct[i]) {
2303                     s->block_last_index[i] =
2304                         dct_quantize_refine(s, s->block[i], weight[i],
2305                                             orig[i], i, s->qscale);
2306                 }
2307             }
2308         }
2309
2310         if (s->luma_elim_threshold && !s->mb_intra)
2311             for (i = 0; i < 4; i++)
2312                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2313         if (s->chroma_elim_threshold && !s->mb_intra)
2314             for (i = 4; i < mb_block_count; i++)
2315                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2316
2317         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2318             for (i = 0; i < mb_block_count; i++) {
2319                 if (s->block_last_index[i] == -1)
2320                     s->coded_score[i] = INT_MAX / 256;
2321             }
2322         }
2323     }
2324
2325     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2326         s->block_last_index[4] =
2327         s->block_last_index[5] = 0;
2328         s->block[4][0] =
2329         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2330     }
2331
2332     // non c quantize code returns incorrect block_last_index FIXME
2333     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2334         for (i = 0; i < mb_block_count; i++) {
2335             int j;
2336             if (s->block_last_index[i] > 0) {
2337                 for (j = 63; j > 0; j--) {
2338                     if (s->block[i][s->intra_scantable.permutated[j]])
2339                         break;
2340                 }
2341                 s->block_last_index[i] = j;
2342             }
2343         }
2344     }
2345
2346     /* huffman encode */
2347     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2348     case AV_CODEC_ID_MPEG1VIDEO:
2349     case AV_CODEC_ID_MPEG2VIDEO:
2350         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2351             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2352         break;
2353     case AV_CODEC_ID_MPEG4:
2354         if (CONFIG_MPEG4_ENCODER)
2355             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2356         break;
2357     case AV_CODEC_ID_MSMPEG4V2:
2358     case AV_CODEC_ID_MSMPEG4V3:
2359     case AV_CODEC_ID_WMV1:
2360         if (CONFIG_MSMPEG4_ENCODER)
2361             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2362         break;
2363     case AV_CODEC_ID_WMV2:
2364         if (CONFIG_WMV2_ENCODER)
2365             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2366         break;
2367     case AV_CODEC_ID_H261:
2368         if (CONFIG_H261_ENCODER)
2369             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2370         break;
2371     case AV_CODEC_ID_H263:
2372     case AV_CODEC_ID_H263P:
2373     case AV_CODEC_ID_FLV1:
2374     case AV_CODEC_ID_RV10:
2375     case AV_CODEC_ID_RV20:
2376         if (CONFIG_H263_ENCODER)
2377             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2378         break;
2379     case AV_CODEC_ID_MJPEG:
2380         if (CONFIG_MJPEG_ENCODER)
2381             ff_mjpeg_encode_mb(s, s->block);
2382         break;
2383     default:
2384         assert(0);
2385     }
2386 }
2387
2388 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2389 {
2390     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2391     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2392 }
2393
2394 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2395     int i;
2396
2397     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2398
2399     /* MPEG-1 */
2400     d->mb_skip_run= s->mb_skip_run;
2401     for(i=0; i<3; i++)
2402         d->last_dc[i] = s->last_dc[i];
2403
2404     /* statistics */
2405     d->mv_bits= s->mv_bits;
2406     d->i_tex_bits= s->i_tex_bits;
2407     d->p_tex_bits= s->p_tex_bits;
2408     d->i_count= s->i_count;
2409     d->f_count= s->f_count;
2410     d->b_count= s->b_count;
2411     d->skip_count= s->skip_count;
2412     d->misc_bits= s->misc_bits;
2413     d->last_bits= 0;
2414
2415     d->mb_skipped= 0;
2416     d->qscale= s->qscale;
2417     d->dquant= s->dquant;
2418
2419     d->esc3_level_length= s->esc3_level_length;
2420 }
2421
2422 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2423     int i;
2424
2425     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2426     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2427
2428     /* MPEG-1 */
2429     d->mb_skip_run= s->mb_skip_run;
2430     for(i=0; i<3; i++)
2431         d->last_dc[i] = s->last_dc[i];
2432
2433     /* statistics */
2434     d->mv_bits= s->mv_bits;
2435     d->i_tex_bits= s->i_tex_bits;
2436     d->p_tex_bits= s->p_tex_bits;
2437     d->i_count= s->i_count;
2438     d->f_count= s->f_count;
2439     d->b_count= s->b_count;
2440     d->skip_count= s->skip_count;
2441     d->misc_bits= s->misc_bits;
2442
2443     d->mb_intra= s->mb_intra;
2444     d->mb_skipped= s->mb_skipped;
2445     d->mv_type= s->mv_type;
2446     d->mv_dir= s->mv_dir;
2447     d->pb= s->pb;
2448     if(s->data_partitioning){
2449         d->pb2= s->pb2;
2450         d->tex_pb= s->tex_pb;
2451     }
2452     d->block= s->block;
2453     for(i=0; i<8; i++)
2454         d->block_last_index[i]= s->block_last_index[i];
2455     d->interlaced_dct= s->interlaced_dct;
2456     d->qscale= s->qscale;
2457
2458     d->esc3_level_length= s->esc3_level_length;
2459 }
2460
2461 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2462                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2463                            int *dmin, int *next_block, int motion_x, int motion_y)
2464 {
2465     int score;
2466     uint8_t *dest_backup[3];
2467
2468     copy_context_before_encode(s, backup, type);
2469
2470     s->block= s->blocks[*next_block];
2471     s->pb= pb[*next_block];
2472     if(s->data_partitioning){
2473         s->pb2   = pb2   [*next_block];
2474         s->tex_pb= tex_pb[*next_block];
2475     }
2476
2477     if(*next_block){
2478         memcpy(dest_backup, s->dest, sizeof(s->dest));
2479         s->dest[0] = s->sc.rd_scratchpad;
2480         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2481         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2482         assert(s->linesize >= 32); //FIXME
2483     }
2484
2485     encode_mb(s, motion_x, motion_y);
2486
2487     score= put_bits_count(&s->pb);
2488     if(s->data_partitioning){
2489         score+= put_bits_count(&s->pb2);
2490         score+= put_bits_count(&s->tex_pb);
2491     }
2492
2493     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2494         ff_mpv_decode_mb(s, s->block);
2495
2496         score *= s->lambda2;
2497         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2498     }
2499
2500     if(*next_block){
2501         memcpy(s->dest, dest_backup, sizeof(s->dest));
2502     }
2503
2504     if(score<*dmin){
2505         *dmin= score;
2506         *next_block^=1;
2507
2508         copy_context_after_encode(best, s, type);
2509     }
2510 }
2511
2512 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2513     uint32_t *sq = ff_square_tab + 256;
2514     int acc=0;
2515     int x,y;
2516
2517     if(w==16 && h==16)
2518         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2519     else if(w==8 && h==8)
2520         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2521
2522     for(y=0; y<h; y++){
2523         for(x=0; x<w; x++){
2524             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2525         }
2526     }
2527
2528     assert(acc>=0);
2529
2530     return acc;
2531 }
2532
2533 static int sse_mb(MpegEncContext *s){
2534     int w= 16;
2535     int h= 16;
2536
2537     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2538     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2539
2540     if(w==16 && h==16)
2541       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2542         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2543                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2544                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2545       }else{
2546         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2547                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2548                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2549       }
2550     else
2551         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2552                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2553                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2554 }
2555
2556 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2557     MpegEncContext *s= *(void**)arg;
2558
2559
2560     s->me.pre_pass=1;
2561     s->me.dia_size= s->avctx->pre_dia_size;
2562     s->first_slice_line=1;
2563     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2564         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2565             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2566         }
2567         s->first_slice_line=0;
2568     }
2569
2570     s->me.pre_pass=0;
2571
2572     return 0;
2573 }
2574
2575 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2576     MpegEncContext *s= *(void**)arg;
2577
2578     s->me.dia_size= s->avctx->dia_size;
2579     s->first_slice_line=1;
2580     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2581         s->mb_x=0; //for block init below
2582         ff_init_block_index(s);
2583         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2584             s->block_index[0]+=2;
2585             s->block_index[1]+=2;
2586             s->block_index[2]+=2;
2587             s->block_index[3]+=2;
2588
2589             /* compute motion vector & mb_type and store in context */
2590             if(s->pict_type==AV_PICTURE_TYPE_B)
2591                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2592             else
2593                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2594         }
2595         s->first_slice_line=0;
2596     }
2597     return 0;
2598 }
2599
2600 static int mb_var_thread(AVCodecContext *c, void *arg){
2601     MpegEncContext *s= *(void**)arg;
2602     int mb_x, mb_y;
2603
2604     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2605         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2606             int xx = mb_x * 16;
2607             int yy = mb_y * 16;
2608             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2609             int varc;
2610             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2611
2612             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2613                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2614
2615             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2616             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2617             s->me.mb_var_sum_temp    += varc;
2618         }
2619     }
2620     return 0;
2621 }
2622
2623 static void write_slice_end(MpegEncContext *s){
2624     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2625         if(s->partitioned_frame){
2626             ff_mpeg4_merge_partitions(s);
2627         }
2628
2629         ff_mpeg4_stuffing(&s->pb);
2630     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2631         ff_mjpeg_encode_stuffing(&s->pb);
2632     }
2633
2634     avpriv_align_put_bits(&s->pb);
2635     flush_put_bits(&s->pb);
2636
2637     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2638         s->misc_bits+= get_bits_diff(s);
2639 }
2640
2641 static void write_mb_info(MpegEncContext *s)
2642 {
2643     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2644     int offset = put_bits_count(&s->pb);
2645     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2646     int gobn = s->mb_y / s->gob_index;
2647     int pred_x, pred_y;
2648     if (CONFIG_H263_ENCODER)
2649         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2650     bytestream_put_le32(&ptr, offset);
2651     bytestream_put_byte(&ptr, s->qscale);
2652     bytestream_put_byte(&ptr, gobn);
2653     bytestream_put_le16(&ptr, mba);
2654     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2655     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2656     /* 4MV not implemented */
2657     bytestream_put_byte(&ptr, 0); /* hmv2 */
2658     bytestream_put_byte(&ptr, 0); /* vmv2 */
2659 }
2660
2661 static void update_mb_info(MpegEncContext *s, int startcode)
2662 {
2663     if (!s->mb_info)
2664         return;
2665     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2666         s->mb_info_size += 12;
2667         s->prev_mb_info = s->last_mb_info;
2668     }
2669     if (startcode) {
2670         s->prev_mb_info = put_bits_count(&s->pb)/8;
2671         /* This might have incremented mb_info_size above, and we return without
2672          * actually writing any info into that slot yet. But in that case,
2673          * this will be called again at the start of the after writing the
2674          * start code, actually writing the mb info. */
2675         return;
2676     }
2677
2678     s->last_mb_info = put_bits_count(&s->pb)/8;
2679     if (!s->mb_info_size)
2680         s->mb_info_size += 12;
2681     write_mb_info(s);
2682 }
2683
2684 static int encode_thread(AVCodecContext *c, void *arg){
2685     MpegEncContext *s= *(void**)arg;
2686     int mb_x, mb_y;
2687     int chr_h= 16>>s->chroma_y_shift;
2688     int i, j;
2689     MpegEncContext best_s = { 0 }, backup_s;
2690     uint8_t bit_buf[2][MAX_MB_BYTES];
2691     uint8_t bit_buf2[2][MAX_MB_BYTES];
2692     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2693     PutBitContext pb[2], pb2[2], tex_pb[2];
2694
2695     for(i=0; i<2; i++){
2696         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2697         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2698         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2699     }
2700
2701     s->last_bits= put_bits_count(&s->pb);
2702     s->mv_bits=0;
2703     s->misc_bits=0;
2704     s->i_tex_bits=0;
2705     s->p_tex_bits=0;
2706     s->i_count=0;
2707     s->f_count=0;
2708     s->b_count=0;
2709     s->skip_count=0;
2710
2711     for(i=0; i<3; i++){
2712         /* init last dc values */
2713         /* note: quant matrix value (8) is implied here */
2714         s->last_dc[i] = 128 << s->intra_dc_precision;
2715
2716         s->current_picture.encoding_error[i] = 0;
2717     }
2718     s->mb_skip_run = 0;
2719     memset(s->last_mv, 0, sizeof(s->last_mv));
2720
2721     s->last_mv_dir = 0;
2722
2723     switch(s->codec_id){
2724     case AV_CODEC_ID_H263:
2725     case AV_CODEC_ID_H263P:
2726     case AV_CODEC_ID_FLV1:
2727         if (CONFIG_H263_ENCODER)
2728             s->gob_index = H263_GOB_HEIGHT(s->height);
2729         break;
2730     case AV_CODEC_ID_MPEG4:
2731         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2732             ff_mpeg4_init_partitions(s);
2733         break;
2734     }
2735
2736     s->resync_mb_x=0;
2737     s->resync_mb_y=0;
2738     s->first_slice_line = 1;
2739     s->ptr_lastgob = s->pb.buf;
2740     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2741         s->mb_x=0;
2742         s->mb_y= mb_y;
2743
2744         ff_set_qscale(s, s->qscale);
2745         ff_init_block_index(s);
2746
2747         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2748             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2749             int mb_type= s->mb_type[xy];
2750 //            int d;
2751             int dmin= INT_MAX;
2752             int dir;
2753
2754             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2755                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2756                 return -1;
2757             }
2758             if(s->data_partitioning){
2759                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2760                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2761                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2762                     return -1;
2763                 }
2764             }
2765
2766             s->mb_x = mb_x;
2767             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2768             ff_update_block_index(s);
2769
2770             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2771                 ff_h261_reorder_mb_index(s);
2772                 xy= s->mb_y*s->mb_stride + s->mb_x;
2773                 mb_type= s->mb_type[xy];
2774             }
2775
2776             /* write gob / video packet header  */
2777             if(s->rtp_mode){
2778                 int current_packet_size, is_gob_start;
2779
2780                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2781
2782                 is_gob_start = s->rtp_payload_size &&
2783                                current_packet_size >= s->rtp_payload_size &&
2784                                mb_y + mb_x > 0;
2785
2786                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2787
2788                 switch(s->codec_id){
2789                 case AV_CODEC_ID_H263:
2790                 case AV_CODEC_ID_H263P:
2791                     if(!s->h263_slice_structured)
2792                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2793                     break;
2794                 case AV_CODEC_ID_MPEG2VIDEO:
2795                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2796                 case AV_CODEC_ID_MPEG1VIDEO:
2797                     if(s->mb_skip_run) is_gob_start=0;
2798                     break;
2799                 }
2800
2801                 if(is_gob_start){
2802                     if(s->start_mb_y != mb_y || mb_x!=0){
2803                         write_slice_end(s);
2804
2805                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2806                             ff_mpeg4_init_partitions(s);
2807                         }
2808                     }
2809
2810                     assert((put_bits_count(&s->pb)&7) == 0);
2811                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2812
2813                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2814                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2815                         int d = 100 / s->error_rate;
2816                         if(r % d == 0){
2817                             current_packet_size=0;
2818                             s->pb.buf_ptr= s->ptr_lastgob;
2819                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2820                         }
2821                     }
2822
2823 #if FF_API_RTP_CALLBACK
2824 FF_DISABLE_DEPRECATION_WARNINGS
2825                     if (s->avctx->rtp_callback){
2826                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2827                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2828                     }
2829 FF_ENABLE_DEPRECATION_WARNINGS
2830 #endif
2831                     update_mb_info(s, 1);
2832
2833                     switch(s->codec_id){
2834                     case AV_CODEC_ID_MPEG4:
2835                         if (CONFIG_MPEG4_ENCODER) {
2836                             ff_mpeg4_encode_video_packet_header(s);
2837                             ff_mpeg4_clean_buffers(s);
2838                         }
2839                     break;
2840                     case AV_CODEC_ID_MPEG1VIDEO:
2841                     case AV_CODEC_ID_MPEG2VIDEO:
2842                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2843                             ff_mpeg1_encode_slice_header(s);
2844                             ff_mpeg1_clean_buffers(s);
2845                         }
2846                     break;
2847                     case AV_CODEC_ID_H263:
2848                     case AV_CODEC_ID_H263P:
2849                         if (CONFIG_H263_ENCODER)
2850                             ff_h263_encode_gob_header(s, mb_y);
2851                     break;
2852                     }
2853
2854                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2855                         int bits= put_bits_count(&s->pb);
2856                         s->misc_bits+= bits - s->last_bits;
2857                         s->last_bits= bits;
2858                     }
2859
2860                     s->ptr_lastgob += current_packet_size;
2861                     s->first_slice_line=1;
2862                     s->resync_mb_x=mb_x;
2863                     s->resync_mb_y=mb_y;
2864                 }
2865             }
2866
2867             if(  (s->resync_mb_x   == s->mb_x)
2868                && s->resync_mb_y+1 == s->mb_y){
2869                 s->first_slice_line=0;
2870             }
2871
2872             s->mb_skipped=0;
2873             s->dquant=0; //only for QP_RD
2874
2875             update_mb_info(s, 0);
2876
2877             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2878                 int next_block=0;
2879                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2880
2881                 copy_context_before_encode(&backup_s, s, -1);
2882                 backup_s.pb= s->pb;
2883                 best_s.data_partitioning= s->data_partitioning;
2884                 best_s.partitioned_frame= s->partitioned_frame;
2885                 if(s->data_partitioning){
2886                     backup_s.pb2= s->pb2;
2887                     backup_s.tex_pb= s->tex_pb;
2888                 }
2889
2890                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2891                     s->mv_dir = MV_DIR_FORWARD;
2892                     s->mv_type = MV_TYPE_16X16;
2893                     s->mb_intra= 0;
2894                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2895                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2896                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2897                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2898                 }
2899                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2900                     s->mv_dir = MV_DIR_FORWARD;
2901                     s->mv_type = MV_TYPE_FIELD;
2902                     s->mb_intra= 0;
2903                     for(i=0; i<2; i++){
2904                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2905                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2906                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2907                     }
2908                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2909                                  &dmin, &next_block, 0, 0);
2910                 }
2911                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2912                     s->mv_dir = MV_DIR_FORWARD;
2913                     s->mv_type = MV_TYPE_16X16;
2914                     s->mb_intra= 0;
2915                     s->mv[0][0][0] = 0;
2916                     s->mv[0][0][1] = 0;
2917                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2918                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2919                 }
2920                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2921                     s->mv_dir = MV_DIR_FORWARD;
2922                     s->mv_type = MV_TYPE_8X8;
2923                     s->mb_intra= 0;
2924                     for(i=0; i<4; i++){
2925                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2926                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2927                     }
2928                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2929                                  &dmin, &next_block, 0, 0);
2930                 }
2931                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2932                     s->mv_dir = MV_DIR_FORWARD;
2933                     s->mv_type = MV_TYPE_16X16;
2934                     s->mb_intra= 0;
2935                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2936                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2937                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2938                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2939                 }
2940                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2941                     s->mv_dir = MV_DIR_BACKWARD;
2942                     s->mv_type = MV_TYPE_16X16;
2943                     s->mb_intra= 0;
2944                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2945                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2946                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2947                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2948                 }
2949                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2950                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2951                     s->mv_type = MV_TYPE_16X16;
2952                     s->mb_intra= 0;
2953                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2954                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2955                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2956                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2957                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2958                                  &dmin, &next_block, 0, 0);
2959                 }
2960                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2961                     s->mv_dir = MV_DIR_FORWARD;
2962                     s->mv_type = MV_TYPE_FIELD;
2963                     s->mb_intra= 0;
2964                     for(i=0; i<2; i++){
2965                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2966                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2967                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2968                     }
2969                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2970                                  &dmin, &next_block, 0, 0);
2971                 }
2972                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2973                     s->mv_dir = MV_DIR_BACKWARD;
2974                     s->mv_type = MV_TYPE_FIELD;
2975                     s->mb_intra= 0;
2976                     for(i=0; i<2; i++){
2977                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2978                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2979                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2980                     }
2981                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2982                                  &dmin, &next_block, 0, 0);
2983                 }
2984                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2985                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2986                     s->mv_type = MV_TYPE_FIELD;
2987                     s->mb_intra= 0;
2988                     for(dir=0; dir<2; dir++){
2989                         for(i=0; i<2; i++){
2990                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2991                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2992                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2993                         }
2994                     }
2995                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2996                                  &dmin, &next_block, 0, 0);
2997                 }
2998                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2999                     s->mv_dir = 0;
3000                     s->mv_type = MV_TYPE_16X16;
3001                     s->mb_intra= 1;
3002                     s->mv[0][0][0] = 0;
3003                     s->mv[0][0][1] = 0;
3004                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3005                                  &dmin, &next_block, 0, 0);
3006                     if(s->h263_pred || s->h263_aic){
3007                         if(best_s.mb_intra)
3008                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3009                         else
3010                             ff_clean_intra_table_entries(s); //old mode?
3011                     }
3012                 }
3013
3014                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3015                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3016                         const int last_qp= backup_s.qscale;
3017                         int qpi, qp, dc[6];
3018                         int16_t ac[6][16];
3019                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3020                         static const int dquant_tab[4]={-1,1,-2,2};
3021
3022                         assert(backup_s.dquant == 0);
3023
3024                         //FIXME intra
3025                         s->mv_dir= best_s.mv_dir;
3026                         s->mv_type = MV_TYPE_16X16;
3027                         s->mb_intra= best_s.mb_intra;
3028                         s->mv[0][0][0] = best_s.mv[0][0][0];
3029                         s->mv[0][0][1] = best_s.mv[0][0][1];
3030                         s->mv[1][0][0] = best_s.mv[1][0][0];
3031                         s->mv[1][0][1] = best_s.mv[1][0][1];
3032
3033                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3034                         for(; qpi<4; qpi++){
3035                             int dquant= dquant_tab[qpi];
3036                             qp= last_qp + dquant;
3037                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3038                                 continue;
3039                             backup_s.dquant= dquant;
3040                             if(s->mb_intra && s->dc_val[0]){
3041                                 for(i=0; i<6; i++){
3042                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3043                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3044                                 }
3045                             }
3046
3047                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3048                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3049                             if(best_s.qscale != qp){
3050                                 if(s->mb_intra && s->dc_val[0]){
3051                                     for(i=0; i<6; i++){
3052                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3053                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3054                                     }
3055                                 }
3056                             }
3057                         }
3058                     }
3059                 }
3060                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3061                     int mx= s->b_direct_mv_table[xy][0];
3062                     int my= s->b_direct_mv_table[xy][1];
3063
3064                     backup_s.dquant = 0;
3065                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3066                     s->mb_intra= 0;
3067                     ff_mpeg4_set_direct_mv(s, mx, my);
3068                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3069                                  &dmin, &next_block, mx, my);
3070                 }
3071                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3072                     backup_s.dquant = 0;
3073                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3074                     s->mb_intra= 0;
3075                     ff_mpeg4_set_direct_mv(s, 0, 0);
3076                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3077                                  &dmin, &next_block, 0, 0);
3078                 }
3079                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3080                     int coded=0;
3081                     for(i=0; i<6; i++)
3082                         coded |= s->block_last_index[i];
3083                     if(coded){
3084                         int mx,my;
3085                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3086                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3087                             mx=my=0; //FIXME find the one we actually used
3088                             ff_mpeg4_set_direct_mv(s, mx, my);
3089                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3090                             mx= s->mv[1][0][0];
3091                             my= s->mv[1][0][1];
3092                         }else{
3093                             mx= s->mv[0][0][0];
3094                             my= s->mv[0][0][1];
3095                         }
3096
3097                         s->mv_dir= best_s.mv_dir;
3098                         s->mv_type = best_s.mv_type;
3099                         s->mb_intra= 0;
3100 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3101                         s->mv[0][0][1] = best_s.mv[0][0][1];
3102                         s->mv[1][0][0] = best_s.mv[1][0][0];
3103                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3104                         backup_s.dquant= 0;
3105                         s->skipdct=1;
3106                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3107                                         &dmin, &next_block, mx, my);
3108                         s->skipdct=0;
3109                     }
3110                 }
3111
3112                 s->current_picture.qscale_table[xy] = best_s.qscale;
3113
3114                 copy_context_after_encode(s, &best_s, -1);
3115
3116                 pb_bits_count= put_bits_count(&s->pb);
3117                 flush_put_bits(&s->pb);
3118                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3119                 s->pb= backup_s.pb;
3120
3121                 if(s->data_partitioning){
3122                     pb2_bits_count= put_bits_count(&s->pb2);
3123                     flush_put_bits(&s->pb2);
3124                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3125                     s->pb2= backup_s.pb2;
3126
3127                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3128                     flush_put_bits(&s->tex_pb);
3129                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3130                     s->tex_pb= backup_s.tex_pb;
3131                 }
3132                 s->last_bits= put_bits_count(&s->pb);
3133
3134                 if (CONFIG_H263_ENCODER &&
3135                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3136                     ff_h263_update_motion_val(s);
3137
3138                 if(next_block==0){ //FIXME 16 vs linesize16
3139                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3140                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3141                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3142                 }
3143
3144                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3145                     ff_mpv_decode_mb(s, s->block);
3146             } else {
3147                 int motion_x = 0, motion_y = 0;
3148                 s->mv_type=MV_TYPE_16X16;
3149                 // only one MB-Type possible
3150
3151                 switch(mb_type){
3152                 case CANDIDATE_MB_TYPE_INTRA:
3153                     s->mv_dir = 0;
3154                     s->mb_intra= 1;
3155                     motion_x= s->mv[0][0][0] = 0;
3156                     motion_y= s->mv[0][0][1] = 0;
3157                     break;
3158                 case CANDIDATE_MB_TYPE_INTER:
3159                     s->mv_dir = MV_DIR_FORWARD;
3160                     s->mb_intra= 0;
3161                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3162                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3163                     break;
3164                 case CANDIDATE_MB_TYPE_INTER_I:
3165                     s->mv_dir = MV_DIR_FORWARD;
3166                     s->mv_type = MV_TYPE_FIELD;
3167                     s->mb_intra= 0;
3168                     for(i=0; i<2; i++){
3169                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3170                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3171                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3172                     }
3173                     break;
3174                 case CANDIDATE_MB_TYPE_INTER4V:
3175                     s->mv_dir = MV_DIR_FORWARD;
3176                     s->mv_type = MV_TYPE_8X8;
3177                     s->mb_intra= 0;
3178                     for(i=0; i<4; i++){
3179                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3180                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3181                     }
3182                     break;
3183                 case CANDIDATE_MB_TYPE_DIRECT:
3184                     if (CONFIG_MPEG4_ENCODER) {
3185                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3186                         s->mb_intra= 0;
3187                         motion_x=s->b_direct_mv_table[xy][0];
3188                         motion_y=s->b_direct_mv_table[xy][1];
3189                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3190                     }
3191                     break;
3192                 case CANDIDATE_MB_TYPE_DIRECT0:
3193                     if (CONFIG_MPEG4_ENCODER) {
3194                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3195                         s->mb_intra= 0;
3196                         ff_mpeg4_set_direct_mv(s, 0, 0);
3197                     }
3198                     break;
3199                 case CANDIDATE_MB_TYPE_BIDIR:
3200                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3201                     s->mb_intra= 0;
3202                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3203                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3204                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3205                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3206                     break;
3207                 case CANDIDATE_MB_TYPE_BACKWARD:
3208                     s->mv_dir = MV_DIR_BACKWARD;
3209                     s->mb_intra= 0;
3210                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3211                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3212                     break;
3213                 case CANDIDATE_MB_TYPE_FORWARD:
3214                     s->mv_dir = MV_DIR_FORWARD;
3215                     s->mb_intra= 0;
3216                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3217                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3218                     break;
3219                 case CANDIDATE_MB_TYPE_FORWARD_I:
3220                     s->mv_dir = MV_DIR_FORWARD;
3221                     s->mv_type = MV_TYPE_FIELD;
3222                     s->mb_intra= 0;
3223                     for(i=0; i<2; i++){
3224                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3225                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3226                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3227                     }
3228                     break;
3229                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3230                     s->mv_dir = MV_DIR_BACKWARD;
3231                     s->mv_type = MV_TYPE_FIELD;
3232                     s->mb_intra= 0;
3233                     for(i=0; i<2; i++){
3234                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3235                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3236                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3237                     }
3238                     break;
3239                 case CANDIDATE_MB_TYPE_BIDIR_I:
3240                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3241                     s->mv_type = MV_TYPE_FIELD;
3242                     s->mb_intra= 0;
3243                     for(dir=0; dir<2; dir++){
3244                         for(i=0; i<2; i++){
3245                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3246                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3247                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3248                         }
3249                     }
3250                     break;
3251                 default:
3252                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3253                 }
3254
3255                 encode_mb(s, motion_x, motion_y);
3256
3257                 // RAL: Update last macroblock type
3258                 s->last_mv_dir = s->mv_dir;
3259
3260                 if (CONFIG_H263_ENCODER &&
3261                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3262                     ff_h263_update_motion_val(s);
3263
3264                 ff_mpv_decode_mb(s, s->block);
3265             }
3266
3267             /* clean the MV table in IPS frames for direct mode in B-frames */
3268             if(s->mb_intra /* && I,P,S_TYPE */){
3269                 s->p_mv_table[xy][0]=0;
3270                 s->p_mv_table[xy][1]=0;
3271             }
3272
3273             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3274                 int w= 16;
3275                 int h= 16;
3276
3277                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3278                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3279
3280                 s->current_picture.encoding_error[0] += sse(
3281                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3282                     s->dest[0], w, h, s->linesize);
3283                 s->current_picture.encoding_error[1] += sse(
3284                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3285                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3286                 s->current_picture.encoding_error[2] += sse(
3287                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3288                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3289             }
3290             if(s->loop_filter){
3291                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3292                     ff_h263_loop_filter(s);
3293             }
3294             ff_dlog(s->avctx, "MB %d %d bits\n",
3295                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3296         }
3297     }
3298
3299     //not beautiful here but we must write it before flushing so it has to be here
3300     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3301         ff_msmpeg4_encode_ext_header(s);
3302
3303     write_slice_end(s);
3304
3305 #if FF_API_RTP_CALLBACK
3306 FF_DISABLE_DEPRECATION_WARNINGS
3307     /* Send the last GOB if RTP */
3308     if (s->avctx->rtp_callback) {
3309         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3310         int pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3311         /* Call the RTP callback to send the last GOB */
3312         emms_c();
3313         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3314     }
3315 FF_ENABLE_DEPRECATION_WARNINGS
3316 #endif
3317
3318     return 0;
3319 }
3320
3321 #define MERGE(field) dst->field += src->field; src->field=0
3322 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3323     MERGE(me.scene_change_score);
3324     MERGE(me.mc_mb_var_sum_temp);
3325     MERGE(me.mb_var_sum_temp);
3326 }
3327
3328 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3329     int i;
3330
3331     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3332     MERGE(dct_count[1]);
3333     MERGE(mv_bits);
3334     MERGE(i_tex_bits);
3335     MERGE(p_tex_bits);
3336     MERGE(i_count);
3337     MERGE(f_count);
3338     MERGE(b_count);
3339     MERGE(skip_count);
3340     MERGE(misc_bits);
3341     MERGE(er.error_count);
3342     MERGE(padding_bug_score);
3343     MERGE(current_picture.encoding_error[0]);
3344     MERGE(current_picture.encoding_error[1]);
3345     MERGE(current_picture.encoding_error[2]);
3346
3347     if (dst->noise_reduction){
3348         for(i=0; i<64; i++){
3349             MERGE(dct_error_sum[0][i]);
3350             MERGE(dct_error_sum[1][i]);
3351         }
3352     }
3353
3354     assert(put_bits_count(&src->pb) % 8 ==0);
3355     assert(put_bits_count(&dst->pb) % 8 ==0);
3356     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3357     flush_put_bits(&dst->pb);
3358 }
3359
3360 static int estimate_qp(MpegEncContext *s, int dry_run){
3361     if (s->next_lambda){
3362         s->current_picture_ptr->f->quality =
3363         s->current_picture.f->quality = s->next_lambda;
3364         if(!dry_run) s->next_lambda= 0;
3365     } else if (!s->fixed_qscale) {
3366         s->current_picture_ptr->f->quality =
3367         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3368         if (s->current_picture.f->quality < 0)
3369             return -1;
3370     }
3371
3372     if(s->adaptive_quant){
3373         switch(s->codec_id){
3374         case AV_CODEC_ID_MPEG4:
3375             if (CONFIG_MPEG4_ENCODER)
3376                 ff_clean_mpeg4_qscales(s);
3377             break;
3378         case AV_CODEC_ID_H263:
3379         case AV_CODEC_ID_H263P:
3380         case AV_CODEC_ID_FLV1:
3381             if (CONFIG_H263_ENCODER)
3382                 ff_clean_h263_qscales(s);
3383             break;
3384         default:
3385             ff_init_qscale_tab(s);
3386         }
3387
3388         s->lambda= s->lambda_table[0];
3389         //FIXME broken
3390     }else
3391         s->lambda = s->current_picture.f->quality;
3392     update_qscale(s);
3393     return 0;
3394 }
3395
3396 /* must be called before writing the header */
3397 static void set_frame_distances(MpegEncContext * s){
3398     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3399     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3400
3401     if(s->pict_type==AV_PICTURE_TYPE_B){
3402         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3403         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3404     }else{
3405         s->pp_time= s->time - s->last_non_b_time;
3406         s->last_non_b_time= s->time;
3407         assert(s->picture_number==0 || s->pp_time > 0);
3408     }
3409 }
3410
3411 static int encode_picture(MpegEncContext *s, int picture_number)
3412 {
3413     int i, ret;
3414     int bits;
3415     int context_count = s->slice_context_count;
3416
3417     s->picture_number = picture_number;
3418
3419     /* Reset the average MB variance */
3420     s->me.mb_var_sum_temp    =
3421     s->me.mc_mb_var_sum_temp = 0;
3422
3423     /* we need to initialize some time vars before we can encode B-frames */
3424     // RAL: Condition added for MPEG1VIDEO
3425     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3426         set_frame_distances(s);
3427     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3428         ff_set_mpeg4_time(s);
3429
3430     s->me.scene_change_score=0;
3431
3432 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3433
3434     if(s->pict_type==AV_PICTURE_TYPE_I){
3435         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3436         else                        s->no_rounding=0;
3437     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3438         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3439             s->no_rounding ^= 1;
3440     }
3441
3442     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3443         if (estimate_qp(s,1) < 0)
3444             return -1;
3445         ff_get_2pass_fcode(s);
3446     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3447         if(s->pict_type==AV_PICTURE_TYPE_B)
3448             s->lambda= s->last_lambda_for[s->pict_type];
3449         else
3450             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3451         update_qscale(s);
3452     }
3453
3454     s->mb_intra=0; //for the rate distortion & bit compare functions
3455     for(i=1; i<context_count; i++){
3456         ret = ff_update_duplicate_context(s->thread_context[i], s);
3457         if (ret < 0)
3458             return ret;
3459     }
3460
3461     if(ff_init_me(s)<0)
3462         return -1;
3463
3464     /* Estimate motion for every MB */
3465     if(s->pict_type != AV_PICTURE_TYPE_I){
3466         s->lambda  = (s->lambda  * s->me_penalty_compensation + 128) >> 8;
3467         s->lambda2 = (s->lambda2 * (int64_t) s->me_penalty_compensation + 128) >> 8;
3468         if (s->pict_type != AV_PICTURE_TYPE_B) {
3469             if ((s->me_pre && s->last_non_b_pict_type == AV_PICTURE_TYPE_I) ||
3470                 s->me_pre == 2) {
3471                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3472             }
3473         }
3474
3475         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3476     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3477         /* I-Frame */
3478         for(i=0; i<s->mb_stride*s->mb_height; i++)
3479             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3480
3481         if(!s->fixed_qscale){
3482             /* finding spatial complexity for I-frame rate control */
3483             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3484         }
3485     }
3486     for(i=1; i<context_count; i++){
3487         merge_context_after_me(s, s->thread_context[i]);
3488     }
3489     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3490     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3491     emms_c();
3492
3493     if (s->me.scene_change_score > s->scenechange_threshold &&
3494         s->pict_type == AV_PICTURE_TYPE_P) {
3495         s->pict_type= AV_PICTURE_TYPE_I;
3496         for(i=0; i<s->mb_stride*s->mb_height; i++)
3497             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3498         ff_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3499                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3500     }
3501
3502     if(!s->umvplus){
3503         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3504             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3505
3506             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3507                 int a,b;
3508                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3509                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3510                 s->f_code= FFMAX3(s->f_code, a, b);
3511             }
3512
3513             ff_fix_long_p_mvs(s);
3514             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3515             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3516                 int j;
3517                 for(i=0; i<2; i++){
3518                     for(j=0; j<2; j++)
3519                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3520                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3521                 }
3522             }
3523         }
3524
3525         if(s->pict_type==AV_PICTURE_TYPE_B){
3526             int a, b;
3527
3528             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3529             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3530             s->f_code = FFMAX(a, b);
3531
3532             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3533             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3534             s->b_code = FFMAX(a, b);
3535
3536             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3537             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3538             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3539             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3540             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3541                 int dir, j;
3542                 for(dir=0; dir<2; dir++){
3543                     for(i=0; i<2; i++){
3544                         for(j=0; j<2; j++){
3545                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3546                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3547                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3548                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3549                         }
3550                     }
3551                 }
3552             }
3553         }
3554     }
3555
3556     if (estimate_qp(s, 0) < 0)
3557         return -1;
3558
3559     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3560         s->pict_type == AV_PICTURE_TYPE_I &&
3561         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3562         s->qscale= 3; //reduce clipping problems
3563
3564     if (s->out_format == FMT_MJPEG) {
3565         /* for mjpeg, we do include qscale in the matrix */
3566         for(i=1;i<64;i++){
3567             int j = s->idsp.idct_permutation[i];
3568
3569             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3570         }
3571         s->y_dc_scale_table=
3572         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3573         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3574         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3575                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3576         s->qscale= 8;
3577     }
3578
3579     //FIXME var duplication
3580     s->current_picture_ptr->f->key_frame =
3581     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3582     s->current_picture_ptr->f->pict_type =
3583     s->current_picture.f->pict_type = s->pict_type;
3584
3585     if (s->current_picture.f->key_frame)
3586         s->picture_in_gop_number=0;
3587
3588     s->last_bits= put_bits_count(&s->pb);
3589     switch(s->out_format) {
3590     case FMT_MJPEG:
3591         if (CONFIG_MJPEG_ENCODER)
3592             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3593                                            s->pred, s->intra_matrix);
3594         break;
3595     case FMT_H261:
3596         if (CONFIG_H261_ENCODER)
3597             ff_h261_encode_picture_header(s, picture_number);
3598         break;
3599     case FMT_H263:
3600         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3601             ff_wmv2_encode_picture_header(s, picture_number);
3602         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3603             ff_msmpeg4_encode_picture_header(s, picture_number);
3604         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3605             ff_mpeg4_encode_picture_header(s, picture_number);
3606         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3607             ret = ff_rv10_encode_picture_header(s, picture_number);
3608             if (ret < 0)
3609                 return ret;
3610         }
3611         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3612             ff_rv20_encode_picture_header(s, picture_number);
3613         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3614             ff_flv_encode_picture_header(s, picture_number);
3615         else if (CONFIG_H263_ENCODER)
3616             ff_h263_encode_picture_header(s, picture_number);
3617         break;
3618     case FMT_MPEG1:
3619         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3620             ff_mpeg1_encode_picture_header(s, picture_number);
3621         break;
3622     default:
3623         assert(0);
3624     }
3625     bits= put_bits_count(&s->pb);
3626     s->header_bits= bits - s->last_bits;
3627
3628     for(i=1; i<context_count; i++){
3629         update_duplicate_context_after_me(s->thread_context[i], s);
3630     }
3631     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3632     for(i=1; i<context_count; i++){
3633         merge_context_after_encode(s, s->thread_context[i]);
3634     }
3635     emms_c();
3636     return 0;
3637 }
3638
3639 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3640     const int intra= s->mb_intra;
3641     int i;
3642
3643     s->dct_count[intra]++;
3644
3645     for(i=0; i<64; i++){
3646         int level= block[i];
3647
3648         if(level){
3649             if(level>0){
3650                 s->dct_error_sum[intra][i] += level;
3651                 level -= s->dct_offset[intra][i];
3652                 if(level<0) level=0;
3653             }else{
3654                 s->dct_error_sum[intra][i] -= level;
3655                 level += s->dct_offset[intra][i];
3656                 if(level>0) level=0;
3657             }
3658             block[i]= level;
3659         }
3660     }
3661 }
3662
3663 static int dct_quantize_trellis_c(MpegEncContext *s,
3664                                   int16_t *block, int n,
3665                                   int qscale, int *overflow){
3666     const int *qmat;
3667     const uint8_t *scantable= s->intra_scantable.scantable;
3668     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3669     int max=0;
3670     unsigned int threshold1, threshold2;
3671     int bias=0;
3672     int run_tab[65];
3673     int level_tab[65];
3674     int score_tab[65];
3675     int survivor[65];
3676     int survivor_count;
3677     int last_run=0;
3678     int last_level=0;
3679     int last_score= 0;
3680     int last_i;
3681     int coeff[2][64];
3682     int coeff_count[64];
3683     int qmul, qadd, start_i, last_non_zero, i, dc;
3684     const int esc_length= s->ac_esc_length;
3685     uint8_t * length;
3686     uint8_t * last_length;
3687     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3688
3689     s->fdsp.fdct(block);
3690
3691     if(s->dct_error_sum)
3692         s->denoise_dct(s, block);
3693     qmul= qscale*16;
3694     qadd= ((qscale-1)|1)*8;
3695
3696     if (s->mb_intra) {
3697         int q;
3698         if (!s->h263_aic) {
3699             if (n < 4)
3700                 q = s->y_dc_scale;
3701             else
3702                 q = s->c_dc_scale;
3703             q = q << 3;
3704         } else{
3705             /* For AIC we skip quant/dequant of INTRADC */
3706             q = 1 << 3;
3707             qadd=0;
3708         }
3709
3710         /* note: block[0] is assumed to be positive */
3711         block[0] = (block[0] + (q >> 1)) / q;
3712         start_i = 1;
3713         last_non_zero = 0;
3714         qmat = s->q_intra_matrix[qscale];
3715         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3716             bias= 1<<(QMAT_SHIFT-1);
3717         length     = s->intra_ac_vlc_length;
3718         last_length= s->intra_ac_vlc_last_length;
3719     } else {
3720         start_i = 0;
3721         last_non_zero = -1;
3722         qmat = s->q_inter_matrix[qscale];
3723         length     = s->inter_ac_vlc_length;
3724         last_length= s->inter_ac_vlc_last_length;
3725     }
3726     last_i= start_i;
3727
3728     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3729     threshold2= (threshold1<<1);
3730
3731     for(i=63; i>=start_i; i--) {
3732         const int j = scantable[i];
3733         int level = block[j] * qmat[j];
3734
3735         if(((unsigned)(level+threshold1))>threshold2){
3736             last_non_zero = i;
3737             break;
3738         }
3739     }
3740
3741     for(i=start_i; i<=last_non_zero; i++) {
3742         const int j = scantable[i];
3743         int level = block[j] * qmat[j];
3744
3745 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3746 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3747         if(((unsigned)(level+threshold1))>threshold2){
3748             if(level>0){
3749                 level= (bias + level)>>QMAT_SHIFT;
3750                 coeff[0][i]= level;
3751                 coeff[1][i]= level-1;
3752 //                coeff[2][k]= level-2;
3753             }else{
3754                 level= (bias - level)>>QMAT_SHIFT;
3755                 coeff[0][i]= -level;
3756                 coeff[1][i]= -level+1;
3757 //                coeff[2][k]= -level+2;
3758             }
3759             coeff_count[i]= FFMIN(level, 2);
3760             assert(coeff_count[i]);
3761             max |=level;
3762         }else{
3763             coeff[0][i]= (level>>31)|1;
3764             coeff_count[i]= 1;
3765         }
3766     }
3767
3768     *overflow= s->max_qcoeff < max; //overflow might have happened
3769
3770     if(last_non_zero < start_i){
3771         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3772         return last_non_zero;
3773     }
3774
3775     score_tab[start_i]= 0;
3776     survivor[0]= start_i;
3777     survivor_count= 1;
3778
3779     for(i=start_i; i<=last_non_zero; i++){
3780         int level_index, j, zero_distortion;
3781         int dct_coeff= FFABS(block[ scantable[i] ]);
3782         int best_score=256*256*256*120;
3783
3784         if (s->fdsp.fdct == ff_fdct_ifast)
3785             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3786         zero_distortion= dct_coeff*dct_coeff;
3787
3788         for(level_index=0; level_index < coeff_count[i]; level_index++){
3789             int distortion;
3790             int level= coeff[level_index][i];
3791             const int alevel= FFABS(level);
3792             int unquant_coeff;
3793
3794             assert(level);
3795
3796             if(s->out_format == FMT_H263){
3797                 unquant_coeff= alevel*qmul + qadd;
3798             } else { // MPEG-1
3799                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3800                 if(s->mb_intra){
3801                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3802                         unquant_coeff =   (unquant_coeff - 1) | 1;
3803                 }else{
3804                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3805                         unquant_coeff =   (unquant_coeff - 1) | 1;
3806                 }
3807                 unquant_coeff<<= 3;
3808             }
3809
3810             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3811             level+=64;
3812             if((level&(~127)) == 0){
3813                 for(j=survivor_count-1; j>=0; j--){
3814                     int run= i - survivor[j];
3815                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3816                     score += score_tab[i-run];
3817
3818                     if(score < best_score){
3819                         best_score= score;
3820                         run_tab[i+1]= run;
3821                         level_tab[i+1]= level-64;
3822                     }
3823                 }
3824
3825                 if(s->out_format == FMT_H263){
3826                     for(j=survivor_count-1; j>=0; j--){
3827                         int run= i - survivor[j];
3828                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3829                         score += score_tab[i-run];
3830                         if(score < last_score){
3831                             last_score= score;
3832                             last_run= run;
3833                             last_level= level-64;
3834                             last_i= i+1;
3835                         }
3836                     }
3837                 }
3838             }else{
3839                 distortion += esc_length*lambda;
3840                 for(j=survivor_count-1; j>=0; j--){
3841                     int run= i - survivor[j];
3842                     int score= distortion + score_tab[i-run];
3843
3844                     if(score < best_score){
3845                         best_score= score;
3846                         run_tab[i+1]= run;
3847                         level_tab[i+1]= level-64;
3848                     }
3849                 }
3850
3851                 if(s->out_format == FMT_H263){
3852                   for(j=survivor_count-1; j>=0; j--){
3853                         int run= i - survivor[j];
3854                         int score= distortion + score_tab[i-run];
3855                         if(score < last_score){
3856                             last_score= score;
3857                             last_run= run;
3858                             last_level= level-64;
3859                             last_i= i+1;
3860                         }
3861                     }
3862                 }
3863             }
3864         }
3865
3866         score_tab[i+1]= best_score;
3867
3868         // Note: there is a vlc code in MPEG-4 which is 1 bit shorter then another one with a shorter run and the same level
3869         if(last_non_zero <= 27){
3870             for(; survivor_count; survivor_count--){
3871                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3872                     break;
3873             }
3874         }else{
3875             for(; survivor_count; survivor_count--){
3876                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3877                     break;
3878             }
3879         }
3880
3881         survivor[ survivor_count++ ]= i+1;
3882     }
3883
3884     if(s->out_format != FMT_H263){
3885         last_score= 256*256*256*120;
3886         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3887             int score= score_tab[i];
3888             if (i)
3889                 score += lambda * 2; // FIXME more exact?
3890
3891             if(score < last_score){
3892                 last_score= score;
3893                 last_i= i;
3894                 last_level= level_tab[i];
3895                 last_run= run_tab[i];
3896             }
3897         }
3898     }
3899
3900     s->coded_score[n] = last_score;
3901
3902     dc= FFABS(block[0]);
3903     last_non_zero= last_i - 1;
3904     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3905
3906     if(last_non_zero < start_i)
3907         return last_non_zero;
3908
3909     if(last_non_zero == 0 && start_i == 0){
3910         int best_level= 0;
3911         int best_score= dc * dc;
3912
3913         for(i=0; i<coeff_count[0]; i++){
3914             int level= coeff[i][0];
3915             int alevel= FFABS(level);
3916             int unquant_coeff, score, distortion;
3917
3918             if(s->out_format == FMT_H263){
3919                     unquant_coeff= (alevel*qmul + qadd)>>3;
3920             } else { // MPEG-1
3921                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3922                     unquant_coeff =   (unquant_coeff - 1) | 1;
3923             }
3924             unquant_coeff = (unquant_coeff + 4) >> 3;
3925             unquant_coeff<<= 3 + 3;
3926
3927             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3928             level+=64;
3929             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3930             else                    score= distortion + esc_length*lambda;
3931
3932             if(score < best_score){
3933                 best_score= score;
3934                 best_level= level - 64;
3935             }
3936         }
3937         block[0]= best_level;
3938         s->coded_score[n] = best_score - dc*dc;
3939         if(best_level == 0) return -1;
3940         else                return last_non_zero;
3941     }
3942
3943     i= last_i;
3944     assert(last_level);
3945
3946     block[ perm_scantable[last_non_zero] ]= last_level;
3947     i -= last_run + 1;
3948
3949     for(; i>start_i; i -= run_tab[i] + 1){
3950         block[ perm_scantable[i-1] ]= level_tab[i];
3951     }
3952
3953     return last_non_zero;
3954 }
3955
3956 //#define REFINE_STATS 1
3957 static int16_t basis[64][64];
3958
3959 static void build_basis(uint8_t *perm){
3960     int i, j, x, y;
3961     emms_c();
3962     for(i=0; i<8; i++){
3963         for(j=0; j<8; j++){
3964             for(y=0; y<8; y++){
3965                 for(x=0; x<8; x++){
3966                     double s= 0.25*(1<<BASIS_SHIFT);
3967                     int index= 8*i + j;
3968                     int perm_index= perm[index];
3969                     if(i==0) s*= sqrt(0.5);
3970                     if(j==0) s*= sqrt(0.5);
3971                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3972                 }
3973             }
3974         }
3975     }
3976 }
3977
3978 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3979                         int16_t *block, int16_t *weight, int16_t *orig,
3980                         int n, int qscale){
3981     int16_t rem[64];
3982     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3983     const uint8_t *scantable= s->intra_scantable.scantable;
3984     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3985 //    unsigned int threshold1, threshold2;
3986 //    int bias=0;
3987     int run_tab[65];
3988     int prev_run=0;
3989     int prev_level=0;
3990     int qmul, qadd, start_i, last_non_zero, i, dc;
3991     uint8_t * length;
3992     uint8_t * last_length;
3993     int lambda;
3994     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3995 #ifdef REFINE_STATS
3996 static int count=0;
3997 static int after_last=0;
3998 static int to_zero=0;
3999 static int from_zero=0;
4000 static int raise=0;
4001 static int lower=0;
4002 static int messed_sign=0;
4003 #endif
4004
4005     if(basis[0][0] == 0)
4006         build_basis(s->idsp.idct_permutation);
4007
4008     qmul= qscale*2;
4009     qadd= (qscale-1)|1;
4010     if (s->mb_intra) {
4011         if (!s->h263_aic) {
4012             if (n < 4)
4013                 q = s->y_dc_scale;
4014             else
4015                 q = s->c_dc_scale;
4016         } else{
4017             /* For AIC we skip quant/dequant of INTRADC */
4018             q = 1;
4019             qadd=0;
4020         }
4021         q <<= RECON_SHIFT-3;
4022         /* note: block[0] is assumed to be positive */
4023         dc= block[0]*q;
4024 //        block[0] = (block[0] + (q >> 1)) / q;
4025         start_i = 1;
4026 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4027 //            bias= 1<<(QMAT_SHIFT-1);
4028         length     = s->intra_ac_vlc_length;
4029         last_length= s->intra_ac_vlc_last_length;
4030     } else {
4031         dc= 0;
4032         start_i = 0;
4033         length     = s->inter_ac_vlc_length;
4034         last_length= s->inter_ac_vlc_last_length;
4035     }
4036     last_non_zero = s->block_last_index[n];
4037
4038 #ifdef REFINE_STATS
4039 {START_TIMER
4040 #endif
4041     dc += (1<<(RECON_SHIFT-1));
4042     for(i=0; i<64; i++){
4043         rem[i] = dc - (orig[i] << RECON_SHIFT); // FIXME use orig directly instead of copying to rem[]
4044     }
4045 #ifdef REFINE_STATS
4046 STOP_TIMER("memset rem[]")}
4047 #endif
4048     sum=0;
4049     for(i=0; i<64; i++){
4050         int one= 36;
4051         int qns=4;
4052         int w;
4053
4054         w= FFABS(weight[i]) + qns*one;
4055         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4056
4057         weight[i] = w;
4058 //        w=weight[i] = (63*qns + (w/2)) / w;
4059
4060         assert(w>0);
4061         assert(w<(1<<6));
4062         sum += w*w;
4063     }
4064     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4065 #ifdef REFINE_STATS
4066 {START_TIMER
4067 #endif
4068     run=0;
4069     rle_index=0;
4070     for(i=start_i; i<=last_non_zero; i++){
4071         int j= perm_scantable[i];
4072         const int level= block[j];
4073         int coeff;
4074
4075         if(level){
4076             if(level<0) coeff= qmul*level - qadd;
4077             else        coeff= qmul*level + qadd;
4078             run_tab[rle_index++]=run;
4079             run=0;
4080
4081             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4082         }else{
4083             run++;
4084         }
4085     }
4086 #ifdef REFINE_STATS
4087 if(last_non_zero>0){
4088 STOP_TIMER("init rem[]")
4089 }
4090 }
4091
4092 {START_TIMER
4093 #endif
4094     for(;;){
4095         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4096         int best_coeff=0;
4097         int best_change=0;
4098         int run2, best_unquant_change=0, analyze_gradient;
4099 #ifdef REFINE_STATS
4100 {START_TIMER
4101 #endif
4102         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4103
4104         if(analyze_gradient){
4105 #ifdef REFINE_STATS
4106 {START_TIMER
4107 #endif
4108             for(i=0; i<64; i++){
4109                 int w= weight[i];
4110
4111                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4112             }
4113 #ifdef REFINE_STATS
4114 STOP_TIMER("rem*w*w")}
4115 {START_TIMER
4116 #endif
4117             s->fdsp.fdct(d1);
4118 #ifdef REFINE_STATS
4119 STOP_TIMER("dct")}
4120 #endif
4121         }
4122
4123         if(start_i){
4124             const int level= block[0];
4125             int change, old_coeff;
4126
4127             assert(s->mb_intra);
4128
4129             old_coeff= q*level;
4130
4131             for(change=-1; change<=1; change+=2){
4132                 int new_level= level + change;
4133                 int score, new_coeff;
4134
4135                 new_coeff= q*new_level;
4136                 if(new_coeff >= 2048 || new_coeff < 0)
4137                     continue;
4138
4139                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4140                                                   new_coeff - old_coeff);
4141                 if(score<best_score){
4142                     best_score= score;
4143                     best_coeff= 0;
4144                     best_change= change;
4145                     best_unquant_change= new_coeff - old_coeff;
4146                 }
4147             }
4148         }
4149
4150         run=0;
4151         rle_index=0;
4152         run2= run_tab[rle_index++];
4153         prev_level=0;
4154         prev_run=0;
4155
4156         for(i=start_i; i<64; i++){
4157             int j= perm_scantable[i];
4158             const int level= block[j];
4159             int change, old_coeff;
4160
4161             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4162                 break;
4163
4164             if(level){
4165                 if(level<0) old_coeff= qmul*level - qadd;
4166                 else        old_coeff= qmul*level + qadd;
4167                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4168             }else{
4169                 old_coeff=0;
4170                 run2--;
4171                 assert(run2>=0 || i >= last_non_zero );
4172             }
4173
4174             for(change=-1; change<=1; change+=2){
4175                 int new_level= level + change;
4176                 int score, new_coeff, unquant_change;
4177
4178                 score=0;
4179                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4180                    continue;
4181
4182                 if(new_level){
4183                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4184                     else            new_coeff= qmul*new_level + qadd;
4185                     if(new_coeff >= 2048 || new_coeff <= -2048)
4186                         continue;
4187                     //FIXME check for overflow
4188
4189                     if(level){
4190                         if(level < 63 && level > -63){
4191                             if(i < last_non_zero)
4192                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4193                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4194                             else
4195                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4196                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4197                         }
4198                     }else{
4199                         assert(FFABS(new_level)==1);
4200
4201                         if(analyze_gradient){
4202                             int g= d1[ scantable[i] ];
4203                             if(g && (g^new_level) >= 0)
4204                                 continue;
4205                         }
4206
4207                         if(i < last_non_zero){
4208                             int next_i= i + run2 + 1;
4209                             int next_level= block[ perm_scantable[next_i] ] + 64;
4210
4211                             if(next_level&(~127))
4212                                 next_level= 0;
4213
4214                             if(next_i < last_non_zero)
4215                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4216                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4217                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4218                             else
4219                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4220                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4221                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4222                         }else{
4223                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4224                             if(prev_level){
4225                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4226                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4227                             }
4228                         }
4229                     }
4230                 }else{
4231                     new_coeff=0;
4232                     assert(FFABS(level)==1);
4233
4234                     if(i < last_non_zero){
4235                         int next_i= i + run2 + 1;
4236                         int next_level= block[ perm_scantable[next_i] ] + 64;
4237
4238                         if(next_level&(~127))
4239                             next_level= 0;
4240
4241                         if(next_i < last_non_zero)
4242                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4243                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4244                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4245                         else
4246                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4247                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4248                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4249                     }else{
4250                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4251                         if(prev_level){
4252                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4253                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4254                         }
4255                     }
4256                 }
4257
4258                 score *= lambda;
4259
4260                 unquant_change= new_coeff - old_coeff;
4261                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4262
4263                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4264                                                    unquant_change);
4265                 if(score<best_score){
4266                     best_score= score;
4267                     best_coeff= i;
4268                     best_change= change;
4269                     best_unquant_change= unquant_change;
4270                 }
4271             }
4272             if(level){
4273                 prev_level= level + 64;
4274                 if(prev_level&(~127))
4275                     prev_level= 0;
4276                 prev_run= run;
4277                 run=0;
4278             }else{
4279                 run++;
4280             }
4281         }
4282 #ifdef REFINE_STATS
4283 STOP_TIMER("iterative step")}
4284 #endif
4285
4286         if(best_change){
4287             int j= perm_scantable[ best_coeff ];
4288
4289             block[j] += best_change;
4290
4291             if(best_coeff > last_non_zero){
4292                 last_non_zero= best_coeff;
4293                 assert(block[j]);
4294 #ifdef REFINE_STATS
4295 after_last++;
4296 #endif
4297             }else{
4298 #ifdef REFINE_STATS
4299 if(block[j]){
4300     if(block[j] - best_change){
4301         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4302             raise++;
4303         }else{
4304             lower++;
4305         }
4306     }else{
4307         from_zero++;
4308     }
4309 }else{
4310     to_zero++;
4311 }
4312 #endif
4313                 for(; last_non_zero>=start_i; last_non_zero--){
4314                     if(block[perm_scantable[last_non_zero]])
4315                         break;
4316                 }
4317             }
4318 #ifdef REFINE_STATS
4319 count++;
4320 if(256*256*256*64 % count == 0){
4321     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4322 }
4323 #endif
4324             run=0;
4325             rle_index=0;
4326             for(i=start_i; i<=last_non_zero; i++){
4327                 int j= perm_scantable[i];
4328                 const int level= block[j];
4329
4330                  if(level){
4331                      run_tab[rle_index++]=run;
4332                      run=0;
4333                  }else{
4334                      run++;
4335                  }
4336             }
4337
4338             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4339         }else{
4340             break;
4341         }
4342     }
4343 #ifdef REFINE_STATS
4344 if(last_non_zero>0){
4345 STOP_TIMER("iterative search")
4346 }
4347 }
4348 #endif
4349
4350     return last_non_zero;
4351 }
4352
4353 /**
4354  * Permute an 8x8 block according to permutation.
4355  * @param block the block which will be permuted according to
4356  *              the given permutation vector
4357  * @param permutation the permutation vector
4358  * @param last the last non zero coefficient in scantable order, used to
4359  *             speed the permutation up
4360  * @param scantable the used scantable, this is only used to speed the
4361  *                  permutation up, the block is not (inverse) permutated
4362  *                  to scantable order!
4363  */
4364 static void block_permute(int16_t *block, uint8_t *permutation,
4365                           const uint8_t *scantable, int last)
4366 {
4367     int i;
4368     int16_t temp[64];
4369
4370     if (last <= 0)
4371         return;
4372     //FIXME it is ok but not clean and might fail for some permutations
4373     // if (permutation[1] == 1)
4374     // return;
4375
4376     for (i = 0; i <= last; i++) {
4377         const int j = scantable[i];
4378         temp[j] = block[j];
4379         block[j] = 0;
4380     }
4381
4382     for (i = 0; i <= last; i++) {
4383         const int j = scantable[i];
4384         const int perm_j = permutation[j];
4385         block[perm_j] = temp[j];
4386     }
4387 }
4388
4389 int ff_dct_quantize_c(MpegEncContext *s,
4390                         int16_t *block, int n,
4391                         int qscale, int *overflow)
4392 {
4393     int i, j, level, last_non_zero, q, start_i;
4394     const int *qmat;
4395     const uint8_t *scantable= s->intra_scantable.scantable;
4396     int bias;
4397     int max=0;
4398     unsigned int threshold1, threshold2;
4399
4400     s->fdsp.fdct(block);
4401
4402     if(s->dct_error_sum)
4403         s->denoise_dct(s, block);
4404
4405     if (s->mb_intra) {
4406         if (!s->h263_aic) {
4407             if (n < 4)
4408                 q = s->y_dc_scale;
4409             else
4410                 q = s->c_dc_scale;
4411             q = q << 3;
4412         } else
4413             /* For AIC we skip quant/dequant of INTRADC */
4414             q = 1 << 3;
4415
4416         /* note: block[0] is assumed to be positive */
4417         block[0] = (block[0] + (q >> 1)) / q;
4418         start_i = 1;
4419         last_non_zero = 0;
4420         qmat = s->q_intra_matrix[qscale];
4421         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4422     } else {
4423         start_i = 0;
4424         last_non_zero = -1;
4425         qmat = s->q_inter_matrix[qscale];
4426         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4427     }
4428     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4429     threshold2= (threshold1<<1);
4430     for(i=63;i>=start_i;i--) {
4431         j = scantable[i];
4432         level = block[j] * qmat[j];
4433
4434         if(((unsigned)(level+threshold1))>threshold2){
4435             last_non_zero = i;
4436             break;
4437         }else{
4438             block[j]=0;
4439         }
4440     }
4441     for(i=start_i; i<=last_non_zero; i++) {
4442         j = scantable[i];
4443         level = block[j] * qmat[j];
4444
4445 //        if(   bias+level >= (1<<QMAT_SHIFT)
4446 //           || bias-level >= (1<<QMAT_SHIFT)){
4447         if(((unsigned)(level+threshold1))>threshold2){
4448             if(level>0){
4449                 level= (bias + level)>>QMAT_SHIFT;
4450                 block[j]= level;
4451             }else{
4452                 level= (bias - level)>>QMAT_SHIFT;
4453                 block[j]= -level;
4454             }
4455             max |=level;
4456         }else{
4457             block[j]=0;
4458         }
4459     }
4460     *overflow= s->max_qcoeff < max; //overflow might have happened
4461
4462     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4463     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4464         block_permute(block, s->idsp.idct_permutation,
4465                       scantable, last_non_zero);
4466
4467     return last_non_zero;
4468 }
4469
4470 #define OFFSET(x) offsetof(MpegEncContext, x)
4471 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4472 static const AVOption h263_options[] = {
4473     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4474     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4475     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4476     FF_MPV_COMMON_OPTS
4477     { NULL },
4478 };
4479
4480 static const AVClass h263_class = {
4481     .class_name = "H.263 encoder",
4482     .item_name  = av_default_item_name,
4483     .option     = h263_options,
4484     .version    = LIBAVUTIL_VERSION_INT,
4485 };
4486
4487 AVCodec ff_h263_encoder = {
4488     .name           = "h263",
4489     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4490     .type           = AVMEDIA_TYPE_VIDEO,
4491     .id             = AV_CODEC_ID_H263,
4492     .priv_data_size = sizeof(MpegEncContext),
4493     .init           = ff_mpv_encode_init,
4494     .encode2        = ff_mpv_encode_picture,
4495     .close          = ff_mpv_encode_end,
4496     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4497     .priv_class     = &h263_class,
4498 };
4499
4500 static const AVOption h263p_options[] = {
4501     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4502     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4503     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4504     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4505     FF_MPV_COMMON_OPTS
4506     { NULL },
4507 };
4508 static const AVClass h263p_class = {
4509     .class_name = "H.263p encoder",
4510     .item_name  = av_default_item_name,
4511     .option     = h263p_options,
4512     .version    = LIBAVUTIL_VERSION_INT,
4513 };
4514
4515 AVCodec ff_h263p_encoder = {
4516     .name           = "h263p",
4517     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4518     .type           = AVMEDIA_TYPE_VIDEO,
4519     .id             = AV_CODEC_ID_H263P,
4520     .priv_data_size = sizeof(MpegEncContext),
4521     .init           = ff_mpv_encode_init,
4522     .encode2        = ff_mpv_encode_picture,
4523     .close          = ff_mpv_encode_end,
4524     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4525     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4526     .priv_class     = &h263p_class,
4527 };
4528
4529 static const AVClass msmpeg4v2_class = {
4530     .class_name = "msmpeg4v2 encoder",
4531     .item_name  = av_default_item_name,
4532     .option     = ff_mpv_generic_options,
4533     .version    = LIBAVUTIL_VERSION_INT,
4534 };
4535
4536 AVCodec ff_msmpeg4v2_encoder = {
4537     .name           = "msmpeg4v2",
4538     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4539     .type           = AVMEDIA_TYPE_VIDEO,
4540     .id             = AV_CODEC_ID_MSMPEG4V2,
4541     .priv_data_size = sizeof(MpegEncContext),
4542     .init           = ff_mpv_encode_init,
4543     .encode2        = ff_mpv_encode_picture,
4544     .close          = ff_mpv_encode_end,
4545     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4546     .priv_class     = &msmpeg4v2_class,
4547 };
4548
4549 static const AVClass msmpeg4v3_class = {
4550     .class_name = "msmpeg4v3 encoder",
4551     .item_name  = av_default_item_name,
4552     .option     = ff_mpv_generic_options,
4553     .version    = LIBAVUTIL_VERSION_INT,
4554 };
4555
4556 AVCodec ff_msmpeg4v3_encoder = {
4557     .name           = "msmpeg4",
4558     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4559     .type           = AVMEDIA_TYPE_VIDEO,
4560     .id             = AV_CODEC_ID_MSMPEG4V3,
4561     .priv_data_size = sizeof(MpegEncContext),
4562     .init           = ff_mpv_encode_init,
4563     .encode2        = ff_mpv_encode_picture,
4564     .close          = ff_mpv_encode_end,
4565     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4566     .priv_class     = &msmpeg4v3_class,
4567 };
4568
4569 static const AVClass wmv1_class = {
4570     .class_name = "wmv1 encoder",
4571     .item_name  = av_default_item_name,
4572     .option     = ff_mpv_generic_options,
4573     .version    = LIBAVUTIL_VERSION_INT,
4574 };
4575
4576 AVCodec ff_wmv1_encoder = {
4577     .name           = "wmv1",
4578     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4579     .type           = AVMEDIA_TYPE_VIDEO,
4580     .id             = AV_CODEC_ID_WMV1,
4581     .priv_data_size = sizeof(MpegEncContext),
4582     .init           = ff_mpv_encode_init,
4583     .encode2        = ff_mpv_encode_picture,
4584     .close          = ff_mpv_encode_end,
4585     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4586     .priv_class     = &wmv1_class,
4587 };