git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "mpegvideodata.h"
  44 #include "h261.h"
  45 #include "h263.h"
  46 #include "h263data.h"
  47 #include "mjpegenc_common.h"
  48 #include "mathops.h"
  49 #include "mpegutils.h"
  50 #include "mjpegenc.h"
  51 #include "msmpeg4.h"
  52 #include "pixblockdsp.h"
  53 #include "qpeldsp.h"
  54 #include "faandct.h"
  55 #include "thread.h"
  56 #include "aandcttab.h"
  57 #include "flv.h"
  58 #include "mpeg4video.h"
  59 #include "internal.h"
  60 #include "bytestream.h"
  61 #include "wmv2.h"
  62 #include "rv10.h"
  63 #include <limits.h>
  64
  65 #define QUANT_BIAS_SHIFT 8
  66
  67 #define QMAT_SHIFT_MMX 16
  68 #define QMAT_SHIFT 22
  69
  70 static int encode_picture(MpegEncContext *s, int picture_number);
  71 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  72 static int sse_mb(MpegEncContext *s);
  73 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  74 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  75
  76 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  77 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  78
  79 const AVOption ff_mpv_generic_options[] = {
  80     FF_MPV_COMMON_OPTS
  81     { NULL },
  82 };
  83
  84 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  85                        uint16_t (*qmat16)[2][64],
  86                        const uint16_t *quant_matrix,
  87                        int bias, int qmin, int qmax, int intra)
  88 {
  89     FDCTDSPContext *fdsp = &s->fdsp;
  90     int qscale;
  91     int shift = 0;
  92
  93     for (qscale = qmin; qscale <= qmax; qscale++) {
  94         int i;
  95         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  96 #if CONFIG_FAANDCT
  97             fdsp->fdct == ff_faandct            ||
  98 #endif /* CONFIG_FAANDCT */
  99             fdsp->fdct == ff_jpeg_fdct_islow_10) {
 100             for (i = 0; i < 64; i++) {
 101                 const int j = s->idsp.idct_permutation[i];
 102                 int64_t den = (int64_t) qscale * quant_matrix[j];
 103                 /* 16 <= qscale * quant_matrix[i] <= 7905
 104                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 105                  *             19952 <=              x  <= 249205026
 106                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 107                  *           3444240 >= (1 << 36) / (x) >= 275 */
 108
 109                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 110             }
 111         } else if (fdsp->fdct == ff_fdct_ifast) {
 112             for (i = 0; i < 64; i++) {
 113                 const int j = s->idsp.idct_permutation[i];
 114                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
 115                 /* 16 <= qscale * quant_matrix[i] <= 7905
 116                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 117                  *             19952 <=              x  <= 249205026
 118                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 119                  *           3444240 >= (1 << 36) / (x) >= 275 */
 120
 121                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
 122             }
 123         } else {
 124             for (i = 0; i < 64; i++) {
 125                 const int j = s->idsp.idct_permutation[i];
 126                 int64_t den = (int64_t) qscale * quant_matrix[j];
 127                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 128                  * Assume x = qscale * quant_matrix[i]
 129                  * So             16 <=              x  <= 7905
 130                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 131                  * so          32768 >= (1 << 19) / (x) >= 67 */
 132                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 133                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 134                 //                    (qscale * quant_matrix[i]);
 135                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
 136
 137                 if (qmat16[qscale][0][i] == 0 ||
 138                     qmat16[qscale][0][i] == 128 * 256)
 139                     qmat16[qscale][0][i] = 128 * 256 - 1;
 140                 qmat16[qscale][1][i] =
 141                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 142                                 qmat16[qscale][0][i]);
 143             }
 144         }
 145
 146         for (i = intra; i < 64; i++) {
 147             int64_t max = 8191;
 148             if (fdsp->fdct == ff_fdct_ifast) {
 149                 max = (8191LL * ff_aanscales[i]) >> 14;
 150             }
 151             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 152                 shift++;
 153             }
 154         }
 155     }
 156     if (shift) {
 157         av_log(NULL, AV_LOG_INFO,
 158                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 159                QMAT_SHIFT - shift);
 160     }
 161 }
 162
 163 static inline void update_qscale(MpegEncContext *s)
 164 {
 165     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 166                 (FF_LAMBDA_SHIFT + 7);
 167     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 168
 169     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 170                  FF_LAMBDA_SHIFT;
 171 }
 172
 173 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 174 {
 175     int i;
 176
 177     if (matrix) {
 178         put_bits(pb, 1, 1);
 179         for (i = 0; i < 64; i++) {
 180             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 181         }
 182     } else
 183         put_bits(pb, 1, 0);
 184 }
 185
 186 /**
 187  * init s->current_picture.qscale_table from s->lambda_table
 188  */
 189 void ff_init_qscale_tab(MpegEncContext *s)
 190 {
 191     int8_t * const qscale_table = s->current_picture.qscale_table;
 192     int i;
 193
 194     for (i = 0; i < s->mb_num; i++) {
 195         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 196         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 197         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 198                                                   s->avctx->qmax);
 199     }
 200 }
 201
 202 static void update_duplicate_context_after_me(MpegEncContext *dst,
 203                                               MpegEncContext *src)
 204 {
 205 #define COPY(a) dst->a= src->a
 206     COPY(pict_type);
 207     COPY(current_picture);
 208     COPY(f_code);
 209     COPY(b_code);
 210     COPY(qscale);
 211     COPY(lambda);
 212     COPY(lambda2);
 213     COPY(picture_in_gop_number);
 214     COPY(gop_picture_number);
 215     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 216     COPY(progressive_frame);    // FIXME don't set in encode_header
 217     COPY(partitioned_frame);    // FIXME don't set in encode_header
 218 #undef COPY
 219 }
 220
 221 /**
 222  * Set the given MpegEncContext to defaults for encoding.
 223  * the changed fields will not depend upon the prior state of the MpegEncContext.
 224  */
 225 static void mpv_encode_defaults(MpegEncContext *s)
 226 {
 227     int i;
 228     ff_mpv_common_defaults(s);
 229
 230     for (i = -16; i < 16; i++) {
 231         default_fcode_tab[i + MAX_MV] = 1;
 232     }
 233     s->me.mv_penalty = default_mv_penalty;
 234     s->fcode_tab     = default_fcode_tab;
 235
 236     s->input_picture_number  = 0;
 237     s->picture_in_gop_number = 0;
 238 }
 239
 240 /* init video encoder */
 241 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 242 {
 243     MpegEncContext *s = avctx->priv_data;
 244     AVCPBProperties *cpb_props;
 245     int i, ret, format_supported;
 246
 247     mpv_encode_defaults(s);
 248
 249     switch (avctx->codec_id) {
 250     case AV_CODEC_ID_MPEG2VIDEO:
 251         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 252             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 253             av_log(avctx, AV_LOG_ERROR,
 254                    "only YUV420 and YUV422 are supported\n");
 255             return -1;
 256         }
 257         break;
 258     case AV_CODEC_ID_MJPEG:
 259         format_supported = 0;
 260         /* JPEG color space */
 261         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 262             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 263             (avctx->color_range == AVCOL_RANGE_JPEG &&
 264              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 265               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
 266             format_supported = 1;
 267         /* MPEG color space */
 268         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 269                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 270                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
 271             format_supported = 1;
 272
 273         if (!format_supported) {
 274             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 275             return -1;
 276         }
 277         break;
 278     default:
 279         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 280             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 281             return -1;
 282         }
 283     }
 284
 285     switch (avctx->pix_fmt) {
 286     case AV_PIX_FMT_YUVJ422P:
 287     case AV_PIX_FMT_YUV422P:
 288         s->chroma_format = CHROMA_422;
 289         break;
 290     case AV_PIX_FMT_YUVJ420P:
 291     case AV_PIX_FMT_YUV420P:
 292     default:
 293         s->chroma_format = CHROMA_420;
 294         break;
 295     }
 296
 297 #if FF_API_PRIVATE_OPT
 298 FF_DISABLE_DEPRECATION_WARNINGS
 299     if (avctx->rtp_payload_size)
 300         s->rtp_payload_size = avctx->rtp_payload_size;
 301     if (avctx->me_penalty_compensation)
 302         s->me_penalty_compensation = avctx->me_penalty_compensation;
 303 FF_ENABLE_DEPRECATION_WARNINGS
 304 #endif
 305
 306     s->bit_rate = avctx->bit_rate;
 307     s->width    = avctx->width;
 308     s->height   = avctx->height;
 309     if (avctx->gop_size > 600 &&
 310         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 311         av_log(avctx, AV_LOG_ERROR,
 312                "Warning keyframe interval too large! reducing it ...\n");
 313         avctx->gop_size = 600;
 314     }
 315     s->gop_size     = avctx->gop_size;
 316     s->avctx        = avctx;
 317     if (avctx->max_b_frames > MAX_B_FRAMES) {
 318         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 319                "is %d.\n", MAX_B_FRAMES);
 320     }
 321     s->max_b_frames = avctx->max_b_frames;
 322     s->codec_id     = avctx->codec->id;
 323     s->strict_std_compliance = avctx->strict_std_compliance;
 324     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
 325     s->rtp_mode           = !!s->rtp_payload_size;
 326     s->intra_dc_precision = avctx->intra_dc_precision;
 327     s->user_specified_pts = AV_NOPTS_VALUE;
 328
 329     if (s->gop_size <= 1) {
 330         s->intra_only = 1;
 331         s->gop_size   = 12;
 332     } else {
 333         s->intra_only = 0;
 334     }
 335
 336 #if FF_API_MOTION_EST
 337 FF_DISABLE_DEPRECATION_WARNINGS
 338     s->me_method = avctx->me_method;
 339 FF_ENABLE_DEPRECATION_WARNINGS
 340 #endif
 341
 342     /* Fixed QSCALE */
 343     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
 344
 345 #if FF_API_MPV_OPT
 346     FF_DISABLE_DEPRECATION_WARNINGS
 347     if (avctx->border_masking != 0.0)
 348         s->border_masking = avctx->border_masking;
 349     FF_ENABLE_DEPRECATION_WARNINGS
 350 #endif
 351
 352     s->adaptive_quant = (s->avctx->lumi_masking ||
 353                          s->avctx->dark_masking ||
 354                          s->avctx->temporal_cplx_masking ||
 355                          s->avctx->spatial_cplx_masking  ||
 356                          s->avctx->p_masking      ||
 357                          s->border_masking ||
 358                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 359                         !s->fixed_qscale;
 360
 361     s->loop_filter = !!(s->avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
 362
 363     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 364         av_log(avctx, AV_LOG_ERROR,
 365                "a vbv buffer size is needed, "
 366                "for encoding with a maximum bitrate\n");
 367         return -1;
 368     }
 369
 370     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 371         av_log(avctx, AV_LOG_INFO,
 372                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 373     }
 374
 375     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 376         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 377         return -1;
 378     }
 379
 380     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 381         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 382         return -1;
 383     }
 384
 385     if (avctx->rc_max_rate &&
 386         avctx->rc_max_rate == avctx->bit_rate &&
 387         avctx->rc_max_rate != avctx->rc_min_rate) {
 388         av_log(avctx, AV_LOG_INFO,
 389                "impossible bitrate constraints, this will fail\n");
 390     }
 391
 392     if (avctx->rc_buffer_size &&
 393         avctx->bit_rate * (int64_t)avctx->time_base.num >
 394             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 395         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 396         return -1;
 397     }
 398
 399     if (!s->fixed_qscale &&
 400         avctx->bit_rate * av_q2d(avctx->time_base) >
 401             avctx->bit_rate_tolerance) {
 402         av_log(avctx, AV_LOG_ERROR,
 403                "bitrate tolerance too small for bitrate\n");
 404         return -1;
 405     }
 406
 407     if (s->avctx->rc_max_rate &&
 408         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 409         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 410          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 411         90000LL * (avctx->rc_buffer_size - 1) >
 412             s->avctx->rc_max_rate * 0xFFFFLL) {
 413         av_log(avctx, AV_LOG_INFO,
 414                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 415                "specified vbv buffer is too large for the given bitrate!\n");
 416     }
 417
 418     if ((s->avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
 419         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 420         s->codec_id != AV_CODEC_ID_FLV1) {
 421         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 422         return -1;
 423     }
 424
 425     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 426         av_log(avctx, AV_LOG_ERROR,
 427                "OBMC is only supported with simple mb decision\n");
 428         return -1;
 429     }
 430
 431     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 432         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 433         return -1;
 434     }
 435
 436     if (s->max_b_frames                    &&
 437         s->codec_id != AV_CODEC_ID_MPEG4      &&
 438         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 439         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 440         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 441         return -1;
 442     }
 443
 444     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 445          s->codec_id == AV_CODEC_ID_H263  ||
 446          s->codec_id == AV_CODEC_ID_H263P) &&
 447         (avctx->sample_aspect_ratio.num > 255 ||
 448          avctx->sample_aspect_ratio.den > 255)) {
 449         av_log(avctx, AV_LOG_ERROR,
 450                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
 451                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 452         return -1;
 453     }
 454
 455     if ((s->avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
 456         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 457         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 458         return -1;
 459     }
 460
 461 #if FF_API_PRIVATE_OPT
 462     FF_DISABLE_DEPRECATION_WARNINGS
 463     if (avctx->mpeg_quant)
 464         s->mpeg_quant = avctx->mpeg_quant;
 465     FF_ENABLE_DEPRECATION_WARNINGS
 466 #endif
 467
 468     // FIXME mpeg2 uses that too
 469     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
 470         av_log(avctx, AV_LOG_ERROR,
 471                "mpeg2 style quantization not supported by codec\n");
 472         return -1;
 473     }
 474
 475     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 476         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 477         return -1;
 478     }
 479
 480     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 481         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 482         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 483         return -1;
 484     }
 485
 486 #if FF_API_PRIVATE_OPT
 487 FF_DISABLE_DEPRECATION_WARNINGS
 488     if (avctx->scenechange_threshold)
 489         s->scenechange_threshold = avctx->scenechange_threshold;
 490 FF_ENABLE_DEPRECATION_WARNINGS
 491 #endif
 492
 493     if (s->scenechange_threshold < 1000000000 &&
 494         (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
 495         av_log(avctx, AV_LOG_ERROR,
 496                "closed gop with scene change detection are not supported yet, "
 497                "set threshold to 1000000000\n");
 498         return -1;
 499     }
 500
 501     if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
 502         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 503             av_log(avctx, AV_LOG_ERROR,
 504                   "low delay forcing is only available for mpeg2\n");
 505             return -1;
 506         }
 507         if (s->max_b_frames != 0) {
 508             av_log(avctx, AV_LOG_ERROR,
 509                    "b frames cannot be used with low delay\n");
 510             return -1;
 511         }
 512     }
 513
 514     if (s->q_scale_type == 1) {
 515         if (avctx->qmax > 12) {
 516             av_log(avctx, AV_LOG_ERROR,
 517                    "non linear quant only supports qmax <= 12 currently\n");
 518             return -1;
 519         }
 520     }
 521
 522     if (avctx->slices > 1 &&
 523         (avctx->codec_id == AV_CODEC_ID_FLV1 || avctx->codec_id == AV_CODEC_ID_H261)) {
 524         av_log(avctx, AV_LOG_ERROR, "Multiple slices are not supported by this codec\n");
 525         return AVERROR(EINVAL);
 526     }
 527
 528     if (s->avctx->thread_count > 1         &&
 529         s->codec_id != AV_CODEC_ID_MPEG4      &&
 530         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 531         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 532         (s->codec_id != AV_CODEC_ID_H263P)) {
 533         av_log(avctx, AV_LOG_ERROR,
 534                "multi threaded encoding not supported by codec\n");
 535         return -1;
 536     }
 537
 538     if (s->avctx->thread_count < 1) {
 539         av_log(avctx, AV_LOG_ERROR,
 540                "automatic thread number detection not supported by codec,"
 541                "patch welcome\n");
 542         return -1;
 543     }
 544
 545     if (!avctx->time_base.den || !avctx->time_base.num) {
 546         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 547         return -1;
 548     }
 549
 550 #if FF_API_PRIVATE_OPT
 551 FF_DISABLE_DEPRECATION_WARNINGS
 552     if (avctx->b_frame_strategy)
 553         s->b_frame_strategy = avctx->b_frame_strategy;
 554     if (avctx->b_sensitivity != 40)
 555         s->b_sensitivity = avctx->b_sensitivity;
 556 FF_ENABLE_DEPRECATION_WARNINGS
 557 #endif
 558
 559     if (s->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
 560         av_log(avctx, AV_LOG_INFO,
 561                "notice: b_frame_strategy only affects the first pass\n");
 562         s->b_frame_strategy = 0;
 563     }
 564
 565     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 566     if (i > 1) {
 567         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 568         avctx->time_base.den /= i;
 569         avctx->time_base.num /= i;
 570         //return -1;
 571     }
 572
 573     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 574         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
 575         // (a + x * 3 / 8) / x
 576         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 577         s->inter_quant_bias = 0;
 578     } else {
 579         s->intra_quant_bias = 0;
 580         // (a - x / 4) / x
 581         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 582     }
 583
 584 #if FF_API_QUANT_BIAS
 585 FF_DISABLE_DEPRECATION_WARNINGS
 586     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 587         s->intra_quant_bias = avctx->intra_quant_bias;
 588     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 589         s->inter_quant_bias = avctx->inter_quant_bias;
 590 FF_ENABLE_DEPRECATION_WARNINGS
 591 #endif
 592
 593     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 594         s->avctx->time_base.den > (1 << 16) - 1) {
 595         av_log(avctx, AV_LOG_ERROR,
 596                "timebase %d/%d not supported by MPEG 4 standard, "
 597                "the maximum admitted value for the timebase denominator "
 598                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 599                (1 << 16) - 1);
 600         return -1;
 601     }
 602     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 603
 604     switch (avctx->codec->id) {
 605     case AV_CODEC_ID_MPEG1VIDEO:
 606         s->out_format = FMT_MPEG1;
 607         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
 608         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 609         break;
 610     case AV_CODEC_ID_MPEG2VIDEO:
 611         s->out_format = FMT_MPEG1;
 612         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
 613         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 614         s->rtp_mode   = 1;
 615         break;
 616     case AV_CODEC_ID_MJPEG:
 617         s->out_format = FMT_MJPEG;
 618         s->intra_only = 1; /* force intra only for jpeg */
 619         if (!CONFIG_MJPEG_ENCODER ||
 620             ff_mjpeg_encode_init(s) < 0)
 621             return -1;
 622         avctx->delay = 0;
 623         s->low_delay = 1;
 624         break;
 625     case AV_CODEC_ID_H261:
 626         if (!CONFIG_H261_ENCODER)
 627             return -1;
 628         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 629             av_log(avctx, AV_LOG_ERROR,
 630                    "The specified picture size of %dx%d is not valid for the "
 631                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 632                     s->width, s->height);
 633             return -1;
 634         }
 635         s->out_format = FMT_H261;
 636         avctx->delay  = 0;
 637         s->low_delay  = 1;
 638         s->rtp_mode   = 0; /* Sliced encoding not supported */
 639         break;
 640     case AV_CODEC_ID_H263:
 641         if (!CONFIG_H263_ENCODER)
 642         return -1;
 643         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 644                              s->width, s->height) == 8) {
 645             av_log(avctx, AV_LOG_INFO,
 646                    "The specified picture size of %dx%d is not valid for "
 647                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 648                    "352x288, 704x576, and 1408x1152."
 649                    "Try H.263+.\n", s->width, s->height);
 650             return -1;
 651         }
 652         s->out_format = FMT_H263;
 653         avctx->delay  = 0;
 654         s->low_delay  = 1;
 655         break;
 656     case AV_CODEC_ID_H263P:
 657         s->out_format = FMT_H263;
 658         s->h263_plus  = 1;
 659         /* Fx */
 660         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
 661         s->modified_quant  = s->h263_aic;
 662         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 663         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 664
 665         /* /Fx */
 666         /* These are just to be sure */
 667         avctx->delay = 0;
 668         s->low_delay = 1;
 669         break;
 670     case AV_CODEC_ID_FLV1:
 671         s->out_format      = FMT_H263;
 672         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 673         s->unrestricted_mv = 1;
 674         s->rtp_mode  = 0; /* don't allow GOB */
 675         avctx->delay = 0;
 676         s->low_delay = 1;
 677         break;
 678     case AV_CODEC_ID_RV10:
 679         s->out_format = FMT_H263;
 680         avctx->delay  = 0;
 681         s->low_delay  = 1;
 682         break;
 683     case AV_CODEC_ID_RV20:
 684         s->out_format      = FMT_H263;
 685         avctx->delay       = 0;
 686         s->low_delay       = 1;
 687         s->modified_quant  = 1;
 688         s->h263_aic        = 1;
 689         s->h263_plus       = 1;
 690         s->loop_filter     = 1;
 691         s->unrestricted_mv = 0;
 692         break;
 693     case AV_CODEC_ID_MPEG4:
 694         s->out_format      = FMT_H263;
 695         s->h263_pred       = 1;
 696         s->unrestricted_mv = 1;
 697         s->low_delay       = s->max_b_frames ? 0 : 1;
 698         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 699         break;
 700     case AV_CODEC_ID_MSMPEG4V2:
 701         s->out_format      = FMT_H263;
 702         s->h263_pred       = 1;
 703         s->unrestricted_mv = 1;
 704         s->msmpeg4_version = 2;
 705         avctx->delay       = 0;
 706         s->low_delay       = 1;
 707         break;
 708     case AV_CODEC_ID_MSMPEG4V3:
 709         s->out_format        = FMT_H263;
 710         s->h263_pred         = 1;
 711         s->unrestricted_mv   = 1;
 712         s->msmpeg4_version   = 3;
 713         s->flipflop_rounding = 1;
 714         avctx->delay         = 0;
 715         s->low_delay         = 1;
 716         break;
 717     case AV_CODEC_ID_WMV1:
 718         s->out_format        = FMT_H263;
 719         s->h263_pred         = 1;
 720         s->unrestricted_mv   = 1;
 721         s->msmpeg4_version   = 4;
 722         s->flipflop_rounding = 1;
 723         avctx->delay         = 0;
 724         s->low_delay         = 1;
 725         break;
 726     case AV_CODEC_ID_WMV2:
 727         s->out_format        = FMT_H263;
 728         s->h263_pred         = 1;
 729         s->unrestricted_mv   = 1;
 730         s->msmpeg4_version   = 5;
 731         s->flipflop_rounding = 1;
 732         avctx->delay         = 0;
 733         s->low_delay         = 1;
 734         break;
 735     default:
 736         return -1;
 737     }
 738
 739 #if FF_API_PRIVATE_OPT
 740     FF_DISABLE_DEPRECATION_WARNINGS
 741     if (avctx->noise_reduction)
 742         s->noise_reduction = avctx->noise_reduction;
 743     FF_ENABLE_DEPRECATION_WARNINGS
 744 #endif
 745
 746     avctx->has_b_frames = !s->low_delay;
 747
 748     s->encoding = 1;
 749
 750     s->progressive_frame    =
 751     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
 752                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
 753                                 s->alternate_scan);
 754
 755     /* init */
 756     ff_mpv_idct_init(s);
 757     if (ff_mpv_common_init(s) < 0)
 758         return -1;
 759
 760     if (ARCH_X86)
 761         ff_mpv_encode_init_x86(s);
 762
 763     ff_fdctdsp_init(&s->fdsp, avctx);
 764     ff_me_cmp_init(&s->mecc, avctx);
 765     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 766     ff_pixblockdsp_init(&s->pdsp, avctx);
 767     ff_qpeldsp_init(&s->qdsp);
 768
 769     if (s->msmpeg4_version) {
 770         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 771                           2 * 2 * (MAX_LEVEL + 1) *
 772                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 773     }
 774     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 775
 776     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 777     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 778     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 779     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 780     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 781                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 782     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 783                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 784
 785
 786     if (s->noise_reduction) {
 787         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 788                           2 * 64 * sizeof(uint16_t), fail);
 789     }
 790
 791     if (CONFIG_H263_ENCODER)
 792         ff_h263dsp_init(&s->h263dsp);
 793     if (!s->dct_quantize)
 794         s->dct_quantize = ff_dct_quantize_c;
 795     if (!s->denoise_dct)
 796         s->denoise_dct  = denoise_dct_c;
 797     s->fast_dct_quantize = s->dct_quantize;
 798     if (avctx->trellis)
 799         s->dct_quantize  = dct_quantize_trellis_c;
 800
 801     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 802         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 803
 804     if (s->slice_context_count > 1) {
 805         s->rtp_mode = 1;
 806
 807         if (avctx->codec_id == AV_CODEC_ID_H263 || avctx->codec_id == AV_CODEC_ID_H263P)
 808             s->h263_slice_structured = 1;
 809     }
 810
 811     s->quant_precision = 5;
 812
 813 #if FF_API_PRIVATE_OPT
 814 FF_DISABLE_DEPRECATION_WARNINGS
 815     if (avctx->frame_skip_threshold)
 816         s->frame_skip_threshold = avctx->frame_skip_threshold;
 817     if (avctx->frame_skip_factor)
 818         s->frame_skip_factor = avctx->frame_skip_factor;
 819     if (avctx->frame_skip_exp)
 820         s->frame_skip_exp = avctx->frame_skip_exp;
 821     if (avctx->frame_skip_cmp != FF_CMP_DCTMAX)
 822         s->frame_skip_cmp = avctx->frame_skip_cmp;
 823 FF_ENABLE_DEPRECATION_WARNINGS
 824 #endif
 825
 826     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 827     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->frame_skip_cmp);
 828
 829     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 830         ff_h261_encode_init(s);
 831     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 832         ff_h263_encode_init(s);
 833     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 834         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
 835             return ret;
 836     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 837         && s->out_format == FMT_MPEG1)
 838         ff_mpeg1_encode_init(s);
 839
 840     /* init q matrix */
 841     for (i = 0; i < 64; i++) {
 842         int j = s->idsp.idct_permutation[i];
 843         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 844             s->mpeg_quant) {
 845             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 846             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 847         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 848             s->intra_matrix[j] =
 849             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 850         } else {
 851             /* mpeg1/2 */
 852             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 853             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 854         }
 855         if (s->avctx->intra_matrix)
 856             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 857         if (s->avctx->inter_matrix)
 858             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 859     }
 860
 861     /* precompute matrix */
 862     /* for mjpeg, we do include qscale in the matrix */
 863     if (s->out_format != FMT_MJPEG) {
 864         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 865                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 866                           31, 1);
 867         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 868                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 869                           31, 0);
 870     }
 871
 872     if (ff_rate_control_init(s) < 0)
 873         return -1;
 874
 875 #if FF_API_ERROR_RATE
 876     FF_DISABLE_DEPRECATION_WARNINGS
 877     if (avctx->error_rate)
 878         s->error_rate = avctx->error_rate;
 879     FF_ENABLE_DEPRECATION_WARNINGS;
 880 #endif
 881
 882 #if FF_API_NORMALIZE_AQP
 883     FF_DISABLE_DEPRECATION_WARNINGS
 884     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 885         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 886     FF_ENABLE_DEPRECATION_WARNINGS;
 887 #endif
 888
 889 #if FF_API_MV0
 890     FF_DISABLE_DEPRECATION_WARNINGS
 891     if (avctx->flags & CODEC_FLAG_MV0)
 892         s->mpv_flags |= FF_MPV_FLAG_MV0;
 893     FF_ENABLE_DEPRECATION_WARNINGS
 894 #endif
 895
 896 #if FF_API_MPV_OPT
 897     FF_DISABLE_DEPRECATION_WARNINGS
 898     if (avctx->rc_qsquish != 0.0)
 899         s->rc_qsquish = avctx->rc_qsquish;
 900     if (avctx->rc_qmod_amp != 0.0)
 901         s->rc_qmod_amp = avctx->rc_qmod_amp;
 902     if (avctx->rc_qmod_freq)
 903         s->rc_qmod_freq = avctx->rc_qmod_freq;
 904     if (avctx->rc_buffer_aggressivity != 1.0)
 905         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
 906     if (avctx->rc_initial_cplx != 0.0)
 907         s->rc_initial_cplx = avctx->rc_initial_cplx;
 908     if (avctx->lmin)
 909         s->lmin = avctx->lmin;
 910     if (avctx->lmax)
 911         s->lmax = avctx->lmax;
 912
 913     if (avctx->rc_eq) {
 914         av_freep(&s->rc_eq);
 915         s->rc_eq = av_strdup(avctx->rc_eq);
 916         if (!s->rc_eq)
 917             return AVERROR(ENOMEM);
 918     }
 919     FF_ENABLE_DEPRECATION_WARNINGS
 920 #endif
 921
 922 #if FF_API_PRIVATE_OPT
 923     FF_DISABLE_DEPRECATION_WARNINGS
 924     if (avctx->brd_scale)
 925         s->brd_scale = avctx->brd_scale;
 926
 927     if (avctx->prediction_method)
 928         s->pred = avctx->prediction_method + 1;
 929     FF_ENABLE_DEPRECATION_WARNINGS
 930 #endif
 931
 932     if (s->b_frame_strategy == 2) {
 933         for (i = 0; i < s->max_b_frames + 2; i++) {
 934             s->tmp_frames[i] = av_frame_alloc();
 935             if (!s->tmp_frames[i])
 936                 return AVERROR(ENOMEM);
 937
 938             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 939             s->tmp_frames[i]->width  = s->width  >> s->brd_scale;
 940             s->tmp_frames[i]->height = s->height >> s->brd_scale;
 941
 942             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 943             if (ret < 0)
 944                 return ret;
 945         }
 946     }
 947
 948     cpb_props = ff_add_cpb_side_data(avctx);
 949     if (!cpb_props)
 950         return AVERROR(ENOMEM);
 951     cpb_props->max_bitrate = avctx->rc_max_rate;
 952     cpb_props->min_bitrate = avctx->rc_min_rate;
 953     cpb_props->avg_bitrate = avctx->bit_rate;
 954     cpb_props->buffer_size = avctx->rc_buffer_size;
 955
 956     return 0;
 957 fail:
 958     ff_mpv_encode_end(avctx);
 959     return AVERROR_UNKNOWN;
 960 }
 961
 962 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
 963 {
 964     MpegEncContext *s = avctx->priv_data;
 965     int i;
 966
 967     ff_rate_control_uninit(s);
 968
 969     ff_mpv_common_end(s);
 970     if (CONFIG_MJPEG_ENCODER &&
 971         s->out_format == FMT_MJPEG)
 972         ff_mjpeg_encode_close(s);
 973
 974     av_freep(&avctx->extradata);
 975
 976     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 977         av_frame_free(&s->tmp_frames[i]);
 978
 979     ff_free_picture_tables(&s->new_picture);
 980     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
 981
 982     av_freep(&s->avctx->stats_out);
 983     av_freep(&s->ac_stats);
 984
 985     av_freep(&s->q_intra_matrix);
 986     av_freep(&s->q_inter_matrix);
 987     av_freep(&s->q_intra_matrix16);
 988     av_freep(&s->q_inter_matrix16);
 989     av_freep(&s->input_picture);
 990     av_freep(&s->reordered_input_picture);
 991     av_freep(&s->dct_offset);
 992
 993     return 0;
 994 }
 995
 996 static int get_sae(uint8_t *src, int ref, int stride)
 997 {
 998     int x,y;
 999     int acc = 0;
1000
1001     for (y = 0; y < 16; y++) {
1002         for (x = 0; x < 16; x++) {
1003             acc += FFABS(src[x + y * stride] - ref);
1004         }
1005     }
1006
1007     return acc;
1008 }
1009
1010 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1011                            uint8_t *ref, int stride)
1012 {
1013     int x, y, w, h;
1014     int acc = 0;
1015
1016     w = s->width  & ~15;
1017     h = s->height & ~15;
1018
1019     for (y = 0; y < h; y += 16) {
1020         for (x = 0; x < w; x += 16) {
1021             int offset = x + y * stride;
1022             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1023                                       stride, 16);
1024             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1025             int sae  = get_sae(src + offset, mean, stride);
1026
1027             acc += sae + 500 < sad;
1028         }
1029     }
1030     return acc;
1031 }
1032
1033 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1034 {
1035     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1036                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1037                             s->mb_stride, s->mb_height, s->b8_stride,
1038                             &s->linesize, &s->uvlinesize);
1039 }
1040
1041 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1042 {
1043     Picture *pic = NULL;
1044     int64_t pts;
1045     int i, display_picture_number = 0, ret;
1046     int encoding_delay = s->max_b_frames ? s->max_b_frames
1047                                          : (s->low_delay ? 0 : 1);
1048     int flush_offset = 1;
1049     int direct = 1;
1050
1051     if (pic_arg) {
1052         pts = pic_arg->pts;
1053         display_picture_number = s->input_picture_number++;
1054
1055         if (pts != AV_NOPTS_VALUE) {
1056             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1057                 int64_t time = pts;
1058                 int64_t last = s->user_specified_pts;
1059
1060                 if (time <= last) {
1061                     av_log(s->avctx, AV_LOG_ERROR,
1062                            "Error, Invalid timestamp=%"PRId64", "
1063                            "last=%"PRId64"\n", pts, s->user_specified_pts);
1064                     return -1;
1065                 }
1066
1067                 if (!s->low_delay && display_picture_number == 1)
1068                     s->dts_delta = time - last;
1069             }
1070             s->user_specified_pts = pts;
1071         } else {
1072             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1073                 s->user_specified_pts =
1074                 pts = s->user_specified_pts + 1;
1075                 av_log(s->avctx, AV_LOG_INFO,
1076                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1077                        pts);
1078             } else {
1079                 pts = display_picture_number;
1080             }
1081         }
1082
1083         if (!pic_arg->buf[0] ||
1084             pic_arg->linesize[0] != s->linesize ||
1085             pic_arg->linesize[1] != s->uvlinesize ||
1086             pic_arg->linesize[2] != s->uvlinesize)
1087             direct = 0;
1088         if ((s->width & 15) || (s->height & 15))
1089             direct = 0;
1090
1091         ff_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
1092                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1093
1094         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1095         if (i < 0)
1096             return i;
1097
1098         pic = &s->picture[i];
1099         pic->reference = 3;
1100
1101         if (direct) {
1102             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1103                 return ret;
1104         }
1105         ret = alloc_picture(s, pic, direct);
1106         if (ret < 0)
1107             return ret;
1108
1109         if (!direct) {
1110             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1111                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1112                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1113                 // empty
1114             } else {
1115                 int h_chroma_shift, v_chroma_shift;
1116                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1117                                                  &h_chroma_shift,
1118                                                  &v_chroma_shift);
1119
1120                 for (i = 0; i < 3; i++) {
1121                     int src_stride = pic_arg->linesize[i];
1122                     int dst_stride = i ? s->uvlinesize : s->linesize;
1123                     int h_shift = i ? h_chroma_shift : 0;
1124                     int v_shift = i ? v_chroma_shift : 0;
1125                     int w = s->width  >> h_shift;
1126                     int h = s->height >> v_shift;
1127                     uint8_t *src = pic_arg->data[i];
1128                     uint8_t *dst = pic->f->data[i];
1129
1130                     if (!s->avctx->rc_buffer_size)
1131                         dst += INPLACE_OFFSET;
1132
1133                     if (src_stride == dst_stride)
1134                         memcpy(dst, src, src_stride * h);
1135                     else {
1136                         int h2 = h;
1137                         uint8_t *dst2 = dst;
1138                         while (h2--) {
1139                             memcpy(dst2, src, w);
1140                             dst2 += dst_stride;
1141                             src += src_stride;
1142                         }
1143                     }
1144                     if ((s->width & 15) || (s->height & 15)) {
1145                         s->mpvencdsp.draw_edges(dst, dst_stride,
1146                                                 w, h,
1147                                                 16 >> h_shift,
1148                                                 16 >> v_shift,
1149                                                 EDGE_BOTTOM);
1150                     }
1151                 }
1152             }
1153         }
1154         ret = av_frame_copy_props(pic->f, pic_arg);
1155         if (ret < 0)
1156             return ret;
1157
1158         pic->f->display_picture_number = display_picture_number;
1159         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1160     } else {
1161         /* Flushing: When we have not received enough input frames,
1162          * ensure s->input_picture[0] contains the first picture */
1163         for (flush_offset = 0; flush_offset < encoding_delay + 1; flush_offset++)
1164             if (s->input_picture[flush_offset])
1165                 break;
1166
1167         if (flush_offset <= 1)
1168             flush_offset = 1;
1169         else
1170             encoding_delay = encoding_delay - flush_offset + 1;
1171     }
1172
1173     /* shift buffer entries */
1174     for (i = flush_offset; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1175         s->input_picture[i - flush_offset] = s->input_picture[i];
1176
1177     s->input_picture[encoding_delay] = (Picture*) pic;
1178
1179     return 0;
1180 }
1181
1182 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1183 {
1184     int x, y, plane;
1185     int score = 0;
1186     int64_t score64 = 0;
1187
1188     for (plane = 0; plane < 3; plane++) {
1189         const int stride = p->f->linesize[plane];
1190         const int bw = plane ? 1 : 2;
1191         for (y = 0; y < s->mb_height * bw; y++) {
1192             for (x = 0; x < s->mb_width * bw; x++) {
1193                 int off = p->shared ? 0 : 16;
1194                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1195                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1196                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1197
1198                 switch (s->frame_skip_exp) {
1199                 case 0: score    =  FFMAX(score, v);          break;
1200                 case 1: score   += FFABS(v);                  break;
1201                 case 2: score   += v * v;                     break;
1202                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1203                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1204                 }
1205             }
1206         }
1207     }
1208
1209     if (score)
1210         score64 = score;
1211
1212     if (score64 < s->frame_skip_threshold)
1213         return 1;
1214     if (score64 < ((s->frame_skip_factor * (int64_t) s->lambda) >> 8))
1215         return 1;
1216     return 0;
1217 }
1218
1219 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1220 {
1221     AVPacket pkt = { 0 };
1222     int ret, got_output;
1223
1224     av_init_packet(&pkt);
1225     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1226     if (ret < 0)
1227         return ret;
1228
1229     ret = pkt.size;
1230     av_packet_unref(&pkt);
1231     return ret;
1232 }
1233
1234 static int estimate_best_b_count(MpegEncContext *s)
1235 {
1236     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1237     AVCodecContext *c = avcodec_alloc_context3(NULL);
1238     const int scale = s->brd_scale;
1239     int i, j, out_size, p_lambda, b_lambda, lambda2;
1240     int64_t best_rd  = INT64_MAX;
1241     int best_b_count = -1;
1242
1243     if (!c)
1244         return AVERROR(ENOMEM);
1245     assert(scale >= 0 && scale <= 3);
1246
1247     //emms_c();
1248     //s->next_picture_ptr->quality;
1249     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1250     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1251     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1252     if (!b_lambda) // FIXME we should do this somewhere else
1253         b_lambda = p_lambda;
1254     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1255                FF_LAMBDA_SHIFT;
1256
1257     c->width        = s->width  >> scale;
1258     c->height       = s->height >> scale;
1259     c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1260     c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1261     c->mb_decision  = s->avctx->mb_decision;
1262     c->me_cmp       = s->avctx->me_cmp;
1263     c->mb_cmp       = s->avctx->mb_cmp;
1264     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1265     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1266     c->time_base    = s->avctx->time_base;
1267     c->max_b_frames = s->max_b_frames;
1268
1269     if (avcodec_open2(c, codec, NULL) < 0)
1270         return -1;
1271
1272     for (i = 0; i < s->max_b_frames + 2; i++) {
1273         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1274                                                 s->next_picture_ptr;
1275
1276         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1277             pre_input = *pre_input_ptr;
1278
1279             if (!pre_input.shared && i) {
1280                 pre_input.f->data[0] += INPLACE_OFFSET;
1281                 pre_input.f->data[1] += INPLACE_OFFSET;
1282                 pre_input.f->data[2] += INPLACE_OFFSET;
1283             }
1284
1285             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1286                                        s->tmp_frames[i]->linesize[0],
1287                                        pre_input.f->data[0],
1288                                        pre_input.f->linesize[0],
1289                                        c->width, c->height);
1290             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1291                                        s->tmp_frames[i]->linesize[1],
1292                                        pre_input.f->data[1],
1293                                        pre_input.f->linesize[1],
1294                                        c->width >> 1, c->height >> 1);
1295             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1296                                        s->tmp_frames[i]->linesize[2],
1297                                        pre_input.f->data[2],
1298                                        pre_input.f->linesize[2],
1299                                        c->width >> 1, c->height >> 1);
1300         }
1301     }
1302
1303     for (j = 0; j < s->max_b_frames + 1; j++) {
1304         int64_t rd = 0;
1305
1306         if (!s->input_picture[j])
1307             break;
1308
1309         c->error[0] = c->error[1] = c->error[2] = 0;
1310
1311         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1312         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1313
1314         out_size = encode_frame(c, s->tmp_frames[0]);
1315
1316         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1317
1318         for (i = 0; i < s->max_b_frames + 1; i++) {
1319             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1320
1321             s->tmp_frames[i + 1]->pict_type = is_p ?
1322                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1323             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1324
1325             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1326
1327             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1328         }
1329
1330         /* get the delayed frames */
1331         while (out_size) {
1332             out_size = encode_frame(c, NULL);
1333             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1334         }
1335
1336         rd += c->error[0] + c->error[1] + c->error[2];
1337
1338         if (rd < best_rd) {
1339             best_rd = rd;
1340             best_b_count = j;
1341         }
1342     }
1343
1344     avcodec_close(c);
1345     av_freep(&c);
1346
1347     return best_b_count;
1348 }
1349
1350 static int select_input_picture(MpegEncContext *s)
1351 {
1352     int i, ret;
1353
1354     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1355         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1356     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1357
1358     /* set next picture type & ordering */
1359     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1360         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1361             !s->next_picture_ptr || s->intra_only) {
1362             s->reordered_input_picture[0] = s->input_picture[0];
1363             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1364             s->reordered_input_picture[0]->f->coded_picture_number =
1365                 s->coded_picture_number++;
1366         } else {
1367             int b_frames = 0;
1368
1369             if (s->frame_skip_threshold || s->frame_skip_factor) {
1370                 if (s->picture_in_gop_number < s->gop_size &&
1371                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1372                     // FIXME check that te gop check above is +-1 correct
1373                     av_frame_unref(s->input_picture[0]->f);
1374
1375                     emms_c();
1376                     ff_vbv_update(s, 0);
1377
1378                     goto no_output_pic;
1379                 }
1380             }
1381
1382             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1383                 for (i = 0; i < s->max_b_frames + 1; i++) {
1384                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1385
1386                     if (pict_num >= s->rc_context.num_entries)
1387                         break;
1388                     if (!s->input_picture[i]) {
1389                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1390                         break;
1391                     }
1392
1393                     s->input_picture[i]->f->pict_type =
1394                         s->rc_context.entry[pict_num].new_pict_type;
1395                 }
1396             }
1397
1398             if (s->b_frame_strategy == 0) {
1399                 b_frames = s->max_b_frames;
1400                 while (b_frames && !s->input_picture[b_frames])
1401                     b_frames--;
1402             } else if (s->b_frame_strategy == 1) {
1403                 for (i = 1; i < s->max_b_frames + 1; i++) {
1404                     if (s->input_picture[i] &&
1405                         s->input_picture[i]->b_frame_score == 0) {
1406                         s->input_picture[i]->b_frame_score =
1407                             get_intra_count(s,
1408                                             s->input_picture[i    ]->f->data[0],
1409                                             s->input_picture[i - 1]->f->data[0],
1410                                             s->linesize) + 1;
1411                     }
1412                 }
1413                 for (i = 0; i < s->max_b_frames + 1; i++) {
1414                     if (!s->input_picture[i] ||
1415                         s->input_picture[i]->b_frame_score - 1 >
1416                             s->mb_num / s->b_sensitivity)
1417                         break;
1418                 }
1419
1420                 b_frames = FFMAX(0, i - 1);
1421
1422                 /* reset scores */
1423                 for (i = 0; i < b_frames + 1; i++) {
1424                     s->input_picture[i]->b_frame_score = 0;
1425                 }
1426             } else if (s->b_frame_strategy == 2) {
1427                 b_frames = estimate_best_b_count(s);
1428             }
1429
1430             emms_c();
1431
1432             for (i = b_frames - 1; i >= 0; i--) {
1433                 int type = s->input_picture[i]->f->pict_type;
1434                 if (type && type != AV_PICTURE_TYPE_B)
1435                     b_frames = i;
1436             }
1437             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1438                 b_frames == s->max_b_frames) {
1439                 av_log(s->avctx, AV_LOG_ERROR,
1440                        "warning, too many b frames in a row\n");
1441             }
1442
1443             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1444                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1445                     s->gop_size > s->picture_in_gop_number) {
1446                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1447                 } else {
1448                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1449                         b_frames = 0;
1450                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1451                 }
1452             }
1453
1454             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1455                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1456                 b_frames--;
1457
1458             s->reordered_input_picture[0] = s->input_picture[b_frames];
1459             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1460                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1461             s->reordered_input_picture[0]->f->coded_picture_number =
1462                 s->coded_picture_number++;
1463             for (i = 0; i < b_frames; i++) {
1464                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1465                 s->reordered_input_picture[i + 1]->f->pict_type =
1466                     AV_PICTURE_TYPE_B;
1467                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1468                     s->coded_picture_number++;
1469             }
1470         }
1471     }
1472 no_output_pic:
1473     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1474
1475     if (s->reordered_input_picture[0]) {
1476         s->reordered_input_picture[0]->reference =
1477            s->reordered_input_picture[0]->f->pict_type !=
1478                AV_PICTURE_TYPE_B ? 3 : 0;
1479
1480         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1481             return ret;
1482
1483         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1484             // input is a shared pix, so we can't modifiy it -> alloc a new
1485             // one & ensure that the shared one is reuseable
1486
1487             Picture *pic;
1488             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1489             if (i < 0)
1490                 return i;
1491             pic = &s->picture[i];
1492
1493             pic->reference = s->reordered_input_picture[0]->reference;
1494             if (alloc_picture(s, pic, 0) < 0) {
1495                 return -1;
1496             }
1497
1498             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1499             if (ret < 0)
1500                 return ret;
1501
1502             /* mark us unused / free shared pic */
1503             av_frame_unref(s->reordered_input_picture[0]->f);
1504             s->reordered_input_picture[0]->shared = 0;
1505
1506             s->current_picture_ptr = pic;
1507         } else {
1508             // input is not a shared pix -> reuse buffer for current_pix
1509             s->current_picture_ptr = s->reordered_input_picture[0];
1510             for (i = 0; i < 4; i++) {
1511                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1512             }
1513         }
1514         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1515         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1516                                        s->current_picture_ptr)) < 0)
1517             return ret;
1518
1519         s->picture_number = s->new_picture.f->display_picture_number;
1520     }
1521     return 0;
1522 }
1523
1524 static void frame_end(MpegEncContext *s)
1525 {
1526     int i;
1527
1528     if (s->unrestricted_mv &&
1529         s->current_picture.reference &&
1530         !s->intra_only) {
1531         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1532         int hshift = desc->log2_chroma_w;
1533         int vshift = desc->log2_chroma_h;
1534         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1535                                 s->h_edge_pos, s->v_edge_pos,
1536                                 EDGE_WIDTH, EDGE_WIDTH,
1537                                 EDGE_TOP | EDGE_BOTTOM);
1538         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1539                                 s->h_edge_pos >> hshift,
1540                                 s->v_edge_pos >> vshift,
1541                                 EDGE_WIDTH >> hshift,
1542                                 EDGE_WIDTH >> vshift,
1543                                 EDGE_TOP | EDGE_BOTTOM);
1544         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1545                                 s->h_edge_pos >> hshift,
1546                                 s->v_edge_pos >> vshift,
1547                                 EDGE_WIDTH >> hshift,
1548                                 EDGE_WIDTH >> vshift,
1549                                 EDGE_TOP | EDGE_BOTTOM);
1550     }
1551
1552     emms_c();
1553
1554     s->last_pict_type                 = s->pict_type;
1555     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1556     if (s->pict_type!= AV_PICTURE_TYPE_B)
1557         s->last_non_b_pict_type = s->pict_type;
1558
1559     if (s->encoding) {
1560         /* release non-reference frames */
1561         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1562             if (!s->picture[i].reference)
1563                 ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1564         }
1565     }
1566
1567 #if FF_API_CODED_FRAME
1568 FF_DISABLE_DEPRECATION_WARNINGS
1569     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1570 FF_ENABLE_DEPRECATION_WARNINGS
1571 #endif
1572 #if FF_API_ERROR_FRAME
1573 FF_DISABLE_DEPRECATION_WARNINGS
1574     memcpy(s->current_picture.f->error, s->current_picture.encoding_error,
1575            sizeof(s->current_picture.encoding_error));
1576 FF_ENABLE_DEPRECATION_WARNINGS
1577 #endif
1578 }
1579
1580 static void update_noise_reduction(MpegEncContext *s)
1581 {
1582     int intra, i;
1583
1584     for (intra = 0; intra < 2; intra++) {
1585         if (s->dct_count[intra] > (1 << 16)) {
1586             for (i = 0; i < 64; i++) {
1587                 s->dct_error_sum[intra][i] >>= 1;
1588             }
1589             s->dct_count[intra] >>= 1;
1590         }
1591
1592         for (i = 0; i < 64; i++) {
1593             s->dct_offset[intra][i] = (s->noise_reduction *
1594                                        s->dct_count[intra] +
1595                                        s->dct_error_sum[intra][i] / 2) /
1596                                       (s->dct_error_sum[intra][i] + 1);
1597         }
1598     }
1599 }
1600
1601 static int frame_start(MpegEncContext *s)
1602 {
1603     int ret;
1604
1605     /* mark & release old frames */
1606     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1607         s->last_picture_ptr != s->next_picture_ptr &&
1608         s->last_picture_ptr->f->buf[0]) {
1609         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1610     }
1611
1612     s->current_picture_ptr->f->pict_type = s->pict_type;
1613     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1614
1615     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1616     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1617                                    s->current_picture_ptr)) < 0)
1618         return ret;
1619
1620     if (s->pict_type != AV_PICTURE_TYPE_B) {
1621         s->last_picture_ptr = s->next_picture_ptr;
1622         if (!s->droppable)
1623             s->next_picture_ptr = s->current_picture_ptr;
1624     }
1625
1626     if (s->last_picture_ptr) {
1627         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1628         if (s->last_picture_ptr->f->buf[0] &&
1629             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1630                                        s->last_picture_ptr)) < 0)
1631             return ret;
1632     }
1633     if (s->next_picture_ptr) {
1634         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1635         if (s->next_picture_ptr->f->buf[0] &&
1636             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1637                                        s->next_picture_ptr)) < 0)
1638             return ret;
1639     }
1640
1641     if (s->picture_structure!= PICT_FRAME) {
1642         int i;
1643         for (i = 0; i < 4; i++) {
1644             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1645                 s->current_picture.f->data[i] +=
1646                     s->current_picture.f->linesize[i];
1647             }
1648             s->current_picture.f->linesize[i] *= 2;
1649             s->last_picture.f->linesize[i]    *= 2;
1650             s->next_picture.f->linesize[i]    *= 2;
1651         }
1652     }
1653
1654     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1655         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1656         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1657     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1658         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1659         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1660     } else {
1661         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1662         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1663     }
1664
1665     if (s->dct_error_sum) {
1666         assert(s->noise_reduction && s->encoding);
1667         update_noise_reduction(s);
1668     }
1669
1670     return 0;
1671 }
1672
1673 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1674                           const AVFrame *pic_arg, int *got_packet)
1675 {
1676     MpegEncContext *s = avctx->priv_data;
1677     int i, stuffing_count, ret;
1678     int context_count = s->slice_context_count;
1679
1680     s->picture_in_gop_number++;
1681
1682     if (load_input_picture(s, pic_arg) < 0)
1683         return -1;
1684
1685     if (select_input_picture(s) < 0) {
1686         return -1;
1687     }
1688
1689     /* output? */
1690     if (s->new_picture.f->data[0]) {
1691         uint8_t *sd;
1692         if (!pkt->data &&
1693             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1694             return ret;
1695         if (s->mb_info) {
1696             s->mb_info_ptr = av_packet_new_side_data(pkt,
1697                                  AV_PKT_DATA_H263_MB_INFO,
1698                                  s->mb_width*s->mb_height*12);
1699             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1700         }
1701
1702         for (i = 0; i < context_count; i++) {
1703             int start_y = s->thread_context[i]->start_mb_y;
1704             int   end_y = s->thread_context[i]->  end_mb_y;
1705             int h       = s->mb_height;
1706             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1707             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1708
1709             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1710         }
1711
1712         s->pict_type = s->new_picture.f->pict_type;
1713         //emms_c();
1714         ret = frame_start(s);
1715         if (ret < 0)
1716             return ret;
1717 vbv_retry:
1718         if (encode_picture(s, s->picture_number) < 0)
1719             return -1;
1720
1721 #if FF_API_STAT_BITS
1722 FF_DISABLE_DEPRECATION_WARNINGS
1723         avctx->header_bits = s->header_bits;
1724         avctx->mv_bits     = s->mv_bits;
1725         avctx->misc_bits   = s->misc_bits;
1726         avctx->i_tex_bits  = s->i_tex_bits;
1727         avctx->p_tex_bits  = s->p_tex_bits;
1728         avctx->i_count     = s->i_count;
1729         // FIXME f/b_count in avctx
1730         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1731         avctx->skip_count  = s->skip_count;
1732 FF_ENABLE_DEPRECATION_WARNINGS
1733 #endif
1734
1735         frame_end(s);
1736
1737         sd = av_packet_new_side_data(pkt, AV_PKT_DATA_QUALITY_FACTOR,
1738                                      sizeof(int));
1739         if (!sd)
1740             return AVERROR(ENOMEM);
1741         *(int *)sd = s->current_picture.f->quality;
1742
1743         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1744             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1745
1746         if (avctx->rc_buffer_size) {
1747             RateControlContext *rcc = &s->rc_context;
1748             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1749
1750             if (put_bits_count(&s->pb) > max_size &&
1751                 s->lambda < s->lmax) {
1752                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1753                                        (s->qscale + 1) / s->qscale);
1754                 if (s->adaptive_quant) {
1755                     int i;
1756                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1757                         s->lambda_table[i] =
1758                             FFMAX(s->lambda_table[i] + 1,
1759                                   s->lambda_table[i] * (s->qscale + 1) /
1760                                   s->qscale);
1761                 }
1762                 s->mb_skipped = 0;        // done in frame_start()
1763                 // done in encode_picture() so we must undo it
1764                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1765                     if (s->flipflop_rounding          ||
1766                         s->codec_id == AV_CODEC_ID_H263P ||
1767                         s->codec_id == AV_CODEC_ID_MPEG4)
1768                         s->no_rounding ^= 1;
1769                 }
1770                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1771                     s->time_base       = s->last_time_base;
1772                     s->last_non_b_time = s->time - s->pp_time;
1773                 }
1774                 for (i = 0; i < context_count; i++) {
1775                     PutBitContext *pb = &s->thread_context[i]->pb;
1776                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1777                 }
1778                 goto vbv_retry;
1779             }
1780
1781             assert(s->avctx->rc_max_rate);
1782         }
1783
1784         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1785             ff_write_pass1_stats(s);
1786
1787         for (i = 0; i < 4; i++) {
1788             s->current_picture_ptr->encoding_error[i] = s->current_picture.encoding_error[i];
1789             avctx->error[i] += s->current_picture_ptr->encoding_error[i];
1790         }
1791
1792         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1793             assert(put_bits_count(&s->pb) == s->header_bits + s->mv_bits +
1794                                              s->misc_bits + s->i_tex_bits +
1795                                              s->p_tex_bits);
1796         flush_put_bits(&s->pb);
1797         s->frame_bits  = put_bits_count(&s->pb);
1798
1799         stuffing_count = ff_vbv_update(s, s->frame_bits);
1800         if (stuffing_count) {
1801             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1802                     stuffing_count + 50) {
1803                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1804                 return -1;
1805             }
1806
1807             switch (s->codec_id) {
1808             case AV_CODEC_ID_MPEG1VIDEO:
1809             case AV_CODEC_ID_MPEG2VIDEO:
1810                 while (stuffing_count--) {
1811                     put_bits(&s->pb, 8, 0);
1812                 }
1813             break;
1814             case AV_CODEC_ID_MPEG4:
1815                 put_bits(&s->pb, 16, 0);
1816                 put_bits(&s->pb, 16, 0x1C3);
1817                 stuffing_count -= 4;
1818                 while (stuffing_count--) {
1819                     put_bits(&s->pb, 8, 0xFF);
1820                 }
1821             break;
1822             default:
1823                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1824             }
1825             flush_put_bits(&s->pb);
1826             s->frame_bits  = put_bits_count(&s->pb);
1827         }
1828
1829         /* update mpeg1/2 vbv_delay for CBR */
1830         if (s->avctx->rc_max_rate                          &&
1831             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1832             s->out_format == FMT_MPEG1                     &&
1833             90000LL * (avctx->rc_buffer_size - 1) <=
1834                 s->avctx->rc_max_rate * 0xFFFFLL) {
1835             AVCPBProperties *props;
1836             size_t props_size;
1837
1838             int vbv_delay, min_delay;
1839             double inbits  = s->avctx->rc_max_rate *
1840                              av_q2d(s->avctx->time_base);
1841             int    minbits = s->frame_bits - 8 *
1842                              (s->vbv_delay_ptr - s->pb.buf - 1);
1843             double bits    = s->rc_context.buffer_index + minbits - inbits;
1844
1845             if (bits < 0)
1846                 av_log(s->avctx, AV_LOG_ERROR,
1847                        "Internal error, negative bits\n");
1848
1849             assert(s->repeat_first_field == 0);
1850
1851             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1852             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1853                         s->avctx->rc_max_rate;
1854
1855             vbv_delay = FFMAX(vbv_delay, min_delay);
1856
1857             assert(vbv_delay < 0xFFFF);
1858
1859             s->vbv_delay_ptr[0] &= 0xF8;
1860             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1861             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1862             s->vbv_delay_ptr[2] &= 0x07;
1863             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1864
1865             props = av_cpb_properties_alloc(&props_size);
1866             if (!props)
1867                 return AVERROR(ENOMEM);
1868             props->vbv_delay = vbv_delay * 300;
1869
1870             ret = av_packet_add_side_data(pkt, AV_PKT_DATA_CPB_PROPERTIES,
1871                                           (uint8_t*)props, props_size);
1872             if (ret < 0) {
1873                 av_freep(&props);
1874                 return ret;
1875             }
1876
1877 #if FF_API_VBV_DELAY
1878 FF_DISABLE_DEPRECATION_WARNINGS
1879             avctx->vbv_delay     = vbv_delay * 300;
1880 FF_ENABLE_DEPRECATION_WARNINGS
1881 #endif
1882         }
1883         s->total_bits     += s->frame_bits;
1884 #if FF_API_STAT_BITS
1885 FF_DISABLE_DEPRECATION_WARNINGS
1886         avctx->frame_bits  = s->frame_bits;
1887 FF_ENABLE_DEPRECATION_WARNINGS
1888 #endif
1889
1890
1891         pkt->pts = s->current_picture.f->pts;
1892         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1893             if (!s->current_picture.f->coded_picture_number)
1894                 pkt->dts = pkt->pts - s->dts_delta;
1895             else
1896                 pkt->dts = s->reordered_pts;
1897             s->reordered_pts = pkt->pts;
1898         } else
1899             pkt->dts = pkt->pts;
1900         if (s->current_picture.f->key_frame)
1901             pkt->flags |= AV_PKT_FLAG_KEY;
1902         if (s->mb_info)
1903             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1904     } else {
1905         s->frame_bits = 0;
1906     }
1907     assert((s->frame_bits & 7) == 0);
1908
1909     pkt->size = s->frame_bits / 8;
1910     *got_packet = !!pkt->size;
1911     return 0;
1912 }
1913
1914 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1915                                                 int n, int threshold)
1916 {
1917     static const char tab[64] = {
1918         3, 2, 2, 1, 1, 1, 1, 1,
1919         1, 1, 1, 1, 1, 1, 1, 1,
1920         1, 1, 1, 1, 1, 1, 1, 1,
1921         0, 0, 0, 0, 0, 0, 0, 0,
1922         0, 0, 0, 0, 0, 0, 0, 0,
1923         0, 0, 0, 0, 0, 0, 0, 0,
1924         0, 0, 0, 0, 0, 0, 0, 0,
1925         0, 0, 0, 0, 0, 0, 0, 0
1926     };
1927     int score = 0;
1928     int run = 0;
1929     int i;
1930     int16_t *block = s->block[n];
1931     const int last_index = s->block_last_index[n];
1932     int skip_dc;
1933
1934     if (threshold < 0) {
1935         skip_dc = 0;
1936         threshold = -threshold;
1937     } else
1938         skip_dc = 1;
1939
1940     /* Are all we could set to zero already zero? */
1941     if (last_index <= skip_dc - 1)
1942         return;
1943
1944     for (i = 0; i <= last_index; i++) {
1945         const int j = s->intra_scantable.permutated[i];
1946         const int level = FFABS(block[j]);
1947         if (level == 1) {
1948             if (skip_dc && i == 0)
1949                 continue;
1950             score += tab[run];
1951             run = 0;
1952         } else if (level > 1) {
1953             return;
1954         } else {
1955             run++;
1956         }
1957     }
1958     if (score >= threshold)
1959         return;
1960     for (i = skip_dc; i <= last_index; i++) {
1961         const int j = s->intra_scantable.permutated[i];
1962         block[j] = 0;
1963     }
1964     if (block[0])
1965         s->block_last_index[n] = 0;
1966     else
1967         s->block_last_index[n] = -1;
1968 }
1969
1970 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1971                                int last_index)
1972 {
1973     int i;
1974     const int maxlevel = s->max_qcoeff;
1975     const int minlevel = s->min_qcoeff;
1976     int overflow = 0;
1977
1978     if (s->mb_intra) {
1979         i = 1; // skip clipping of intra dc
1980     } else
1981         i = 0;
1982
1983     for (; i <= last_index; i++) {
1984         const int j = s->intra_scantable.permutated[i];
1985         int level = block[j];
1986
1987         if (level > maxlevel) {
1988             level = maxlevel;
1989             overflow++;
1990         } else if (level < minlevel) {
1991             level = minlevel;
1992             overflow++;
1993         }
1994
1995         block[j] = level;
1996     }
1997
1998     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1999         av_log(s->avctx, AV_LOG_INFO,
2000                "warning, clipping %d dct coefficients to %d..%d\n",
2001                overflow, minlevel, maxlevel);
2002 }
2003
2004 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2005 {
2006     int x, y;
2007     // FIXME optimize
2008     for (y = 0; y < 8; y++) {
2009         for (x = 0; x < 8; x++) {
2010             int x2, y2;
2011             int sum = 0;
2012             int sqr = 0;
2013             int count = 0;
2014
2015             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2016                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2017                     int v = ptr[x2 + y2 * stride];
2018                     sum += v;
2019                     sqr += v * v;
2020                     count++;
2021                 }
2022             }
2023             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2024         }
2025     }
2026 }
2027
2028 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2029                                                 int motion_x, int motion_y,
2030                                                 int mb_block_height,
2031                                                 int mb_block_count)
2032 {
2033     int16_t weight[8][64];
2034     int16_t orig[8][64];
2035     const int mb_x = s->mb_x;
2036     const int mb_y = s->mb_y;
2037     int i;
2038     int skip_dct[8];
2039     int dct_offset = s->linesize * 8; // default for progressive frames
2040     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2041     ptrdiff_t wrap_y, wrap_c;
2042
2043     for (i = 0; i < mb_block_count; i++)
2044         skip_dct[i] = s->skipdct;
2045
2046     if (s->adaptive_quant) {
2047         const int last_qp = s->qscale;
2048         const int mb_xy = mb_x + mb_y * s->mb_stride;
2049
2050         s->lambda = s->lambda_table[mb_xy];
2051         update_qscale(s);
2052
2053         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2054             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2055             s->dquant = s->qscale - last_qp;
2056
2057             if (s->out_format == FMT_H263) {
2058                 s->dquant = av_clip(s->dquant, -2, 2);
2059
2060                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2061                     if (!s->mb_intra) {
2062                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2063                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2064                                 s->dquant = 0;
2065                         }
2066                         if (s->mv_type == MV_TYPE_8X8)
2067                             s->dquant = 0;
2068                     }
2069                 }
2070             }
2071         }
2072         ff_set_qscale(s, last_qp + s->dquant);
2073     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2074         ff_set_qscale(s, s->qscale + s->dquant);
2075
2076     wrap_y = s->linesize;
2077     wrap_c = s->uvlinesize;
2078     ptr_y  = s->new_picture.f->data[0] +
2079              (mb_y * 16 * wrap_y)              + mb_x * 16;
2080     ptr_cb = s->new_picture.f->data[1] +
2081              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
2082     ptr_cr = s->new_picture.f->data[2] +
2083              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
2084
2085     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
2086         uint8_t *ebuf = s->sc.edge_emu_buffer + 32;
2087         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2088                                  wrap_y, wrap_y,
2089                                  16, 16, mb_x * 16, mb_y * 16,
2090                                  s->width, s->height);
2091         ptr_y = ebuf;
2092         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
2093                                  wrap_c, wrap_c,
2094                                  8, mb_block_height, mb_x * 8, mb_y * 8,
2095                                  s->width >> 1, s->height >> 1);
2096         ptr_cb = ebuf + 18 * wrap_y;
2097         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
2098                                  wrap_c, wrap_c,
2099                                  8, mb_block_height, mb_x * 8, mb_y * 8,
2100                                  s->width >> 1, s->height >> 1);
2101         ptr_cr = ebuf + 18 * wrap_y + 8;
2102     }
2103
2104     if (s->mb_intra) {
2105         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2106             int progressive_score, interlaced_score;
2107
2108             s->interlaced_dct = 0;
2109             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2110                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2111                                                      NULL, wrap_y, 8) - 400;
2112
2113             if (progressive_score > 0) {
2114                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2115                                                         NULL, wrap_y * 2, 8) +
2116                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2117                                                         NULL, wrap_y * 2, 8);
2118                 if (progressive_score > interlaced_score) {
2119                     s->interlaced_dct = 1;
2120
2121                     dct_offset = wrap_y;
2122                     wrap_y <<= 1;
2123                     if (s->chroma_format == CHROMA_422)
2124                         wrap_c <<= 1;
2125                 }
2126             }
2127         }
2128
2129         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2130         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2131         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2132         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2133
2134         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2135             skip_dct[4] = 1;
2136             skip_dct[5] = 1;
2137         } else {
2138             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2139             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2140             if (!s->chroma_y_shift) { /* 422 */
2141                 s->pdsp.get_pixels(s->block[6],
2142                                    ptr_cb + (dct_offset >> 1), wrap_c);
2143                 s->pdsp.get_pixels(s->block[7],
2144                                    ptr_cr + (dct_offset >> 1), wrap_c);
2145             }
2146         }
2147     } else {
2148         op_pixels_func (*op_pix)[4];
2149         qpel_mc_func (*op_qpix)[16];
2150         uint8_t *dest_y, *dest_cb, *dest_cr;
2151
2152         dest_y  = s->dest[0];
2153         dest_cb = s->dest[1];
2154         dest_cr = s->dest[2];
2155
2156         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2157             op_pix  = s->hdsp.put_pixels_tab;
2158             op_qpix = s->qdsp.put_qpel_pixels_tab;
2159         } else {
2160             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2161             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2162         }
2163
2164         if (s->mv_dir & MV_DIR_FORWARD) {
2165             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2166                           s->last_picture.f->data,
2167                           op_pix, op_qpix);
2168             op_pix  = s->hdsp.avg_pixels_tab;
2169             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2170         }
2171         if (s->mv_dir & MV_DIR_BACKWARD) {
2172             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2173                           s->next_picture.f->data,
2174                           op_pix, op_qpix);
2175         }
2176
2177         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2178             int progressive_score, interlaced_score;
2179
2180             s->interlaced_dct = 0;
2181             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2182                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2183                                                      ptr_y + wrap_y * 8,
2184                                                      wrap_y, 8) - 400;
2185
2186             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2187                 progressive_score -= 400;
2188
2189             if (progressive_score > 0) {
2190                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2191                                                         wrap_y * 2, 8) +
2192                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2193                                                         ptr_y + wrap_y,
2194                                                         wrap_y * 2, 8);
2195
2196                 if (progressive_score > interlaced_score) {
2197                     s->interlaced_dct = 1;
2198
2199                     dct_offset = wrap_y;
2200                     wrap_y <<= 1;
2201                     if (s->chroma_format == CHROMA_422)
2202                         wrap_c <<= 1;
2203                 }
2204             }
2205         }
2206
2207         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2208         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2209         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2210                             dest_y + dct_offset, wrap_y);
2211         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2212                             dest_y + dct_offset + 8, wrap_y);
2213
2214         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2215             skip_dct[4] = 1;
2216             skip_dct[5] = 1;
2217         } else {
2218             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2219             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2220             if (!s->chroma_y_shift) { /* 422 */
2221                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2222                                     dest_cb + (dct_offset >> 1), wrap_c);
2223                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2224                                     dest_cr + (dct_offset >> 1), wrap_c);
2225             }
2226         }
2227         /* pre quantization */
2228         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2229                 2 * s->qscale * s->qscale) {
2230             // FIXME optimize
2231             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2232                 skip_dct[0] = 1;
2233             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2234                 skip_dct[1] = 1;
2235             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2236                                wrap_y, 8) < 20 * s->qscale)
2237                 skip_dct[2] = 1;
2238             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2239                                wrap_y, 8) < 20 * s->qscale)
2240                 skip_dct[3] = 1;
2241             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2242                 skip_dct[4] = 1;
2243             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2244                 skip_dct[5] = 1;
2245             if (!s->chroma_y_shift) { /* 422 */
2246                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2247                                    dest_cb + (dct_offset >> 1),
2248                                    wrap_c, 8) < 20 * s->qscale)
2249                     skip_dct[6] = 1;
2250                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2251                                    dest_cr + (dct_offset >> 1),
2252                                    wrap_c, 8) < 20 * s->qscale)
2253                     skip_dct[7] = 1;
2254             }
2255         }
2256     }
2257
2258     if (s->quantizer_noise_shaping) {
2259         if (!skip_dct[0])
2260             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2261         if (!skip_dct[1])
2262             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2263         if (!skip_dct[2])
2264             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2265         if (!skip_dct[3])
2266             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2267         if (!skip_dct[4])
2268             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2269         if (!skip_dct[5])
2270             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2271         if (!s->chroma_y_shift) { /* 422 */
2272             if (!skip_dct[6])
2273                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2274                                   wrap_c);
2275             if (!skip_dct[7])
2276                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2277                                   wrap_c);
2278         }
2279         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2280     }
2281
2282     /* DCT & quantize */
2283     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2284     {
2285         for (i = 0; i < mb_block_count; i++) {
2286             if (!skip_dct[i]) {
2287                 int overflow;
2288                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2289                 // FIXME we could decide to change to quantizer instead of
2290                 // clipping
2291                 // JS: I don't think that would be a good idea it could lower
2292                 //     quality instead of improve it. Just INTRADC clipping
2293                 //     deserves changes in quantizer
2294                 if (overflow)
2295                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2296             } else
2297                 s->block_last_index[i] = -1;
2298         }
2299         if (s->quantizer_noise_shaping) {
2300             for (i = 0; i < mb_block_count; i++) {
2301                 if (!skip_dct[i]) {
2302                     s->block_last_index[i] =
2303                         dct_quantize_refine(s, s->block[i], weight[i],
2304                                             orig[i], i, s->qscale);
2305                 }
2306             }
2307         }
2308
2309         if (s->luma_elim_threshold && !s->mb_intra)
2310             for (i = 0; i < 4; i++)
2311                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2312         if (s->chroma_elim_threshold && !s->mb_intra)
2313             for (i = 4; i < mb_block_count; i++)
2314                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2315
2316         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2317             for (i = 0; i < mb_block_count; i++) {
2318                 if (s->block_last_index[i] == -1)
2319                     s->coded_score[i] = INT_MAX / 256;
2320             }
2321         }
2322     }
2323
2324     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2325         s->block_last_index[4] =
2326         s->block_last_index[5] = 0;
2327         s->block[4][0] =
2328         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2329     }
2330
2331     // non c quantize code returns incorrect block_last_index FIXME
2332     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2333         for (i = 0; i < mb_block_count; i++) {
2334             int j;
2335             if (s->block_last_index[i] > 0) {
2336                 for (j = 63; j > 0; j--) {
2337                     if (s->block[i][s->intra_scantable.permutated[j]])
2338                         break;
2339                 }
2340                 s->block_last_index[i] = j;
2341             }
2342         }
2343     }
2344
2345     /* huffman encode */
2346     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2347     case AV_CODEC_ID_MPEG1VIDEO:
2348     case AV_CODEC_ID_MPEG2VIDEO:
2349         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2350             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2351         break;
2352     case AV_CODEC_ID_MPEG4:
2353         if (CONFIG_MPEG4_ENCODER)
2354             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2355         break;
2356     case AV_CODEC_ID_MSMPEG4V2:
2357     case AV_CODEC_ID_MSMPEG4V3:
2358     case AV_CODEC_ID_WMV1:
2359         if (CONFIG_MSMPEG4_ENCODER)
2360             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2361         break;
2362     case AV_CODEC_ID_WMV2:
2363         if (CONFIG_WMV2_ENCODER)
2364             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2365         break;
2366     case AV_CODEC_ID_H261:
2367         if (CONFIG_H261_ENCODER)
2368             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2369         break;
2370     case AV_CODEC_ID_H263:
2371     case AV_CODEC_ID_H263P:
2372     case AV_CODEC_ID_FLV1:
2373     case AV_CODEC_ID_RV10:
2374     case AV_CODEC_ID_RV20:
2375         if (CONFIG_H263_ENCODER)
2376             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2377         break;
2378     case AV_CODEC_ID_MJPEG:
2379         if (CONFIG_MJPEG_ENCODER)
2380             ff_mjpeg_encode_mb(s, s->block);
2381         break;
2382     default:
2383         assert(0);
2384     }
2385 }
2386
2387 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2388 {
2389     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2390     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2391 }
2392
2393 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2394     int i;
2395
2396     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2397
2398     /* mpeg1 */
2399     d->mb_skip_run= s->mb_skip_run;
2400     for(i=0; i<3; i++)
2401         d->last_dc[i] = s->last_dc[i];
2402
2403     /* statistics */
2404     d->mv_bits= s->mv_bits;
2405     d->i_tex_bits= s->i_tex_bits;
2406     d->p_tex_bits= s->p_tex_bits;
2407     d->i_count= s->i_count;
2408     d->f_count= s->f_count;
2409     d->b_count= s->b_count;
2410     d->skip_count= s->skip_count;
2411     d->misc_bits= s->misc_bits;
2412     d->last_bits= 0;
2413
2414     d->mb_skipped= 0;
2415     d->qscale= s->qscale;
2416     d->dquant= s->dquant;
2417
2418     d->esc3_level_length= s->esc3_level_length;
2419 }
2420
2421 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2422     int i;
2423
2424     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2425     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2426
2427     /* mpeg1 */
2428     d->mb_skip_run= s->mb_skip_run;
2429     for(i=0; i<3; i++)
2430         d->last_dc[i] = s->last_dc[i];
2431
2432     /* statistics */
2433     d->mv_bits= s->mv_bits;
2434     d->i_tex_bits= s->i_tex_bits;
2435     d->p_tex_bits= s->p_tex_bits;
2436     d->i_count= s->i_count;
2437     d->f_count= s->f_count;
2438     d->b_count= s->b_count;
2439     d->skip_count= s->skip_count;
2440     d->misc_bits= s->misc_bits;
2441
2442     d->mb_intra= s->mb_intra;
2443     d->mb_skipped= s->mb_skipped;
2444     d->mv_type= s->mv_type;
2445     d->mv_dir= s->mv_dir;
2446     d->pb= s->pb;
2447     if(s->data_partitioning){
2448         d->pb2= s->pb2;
2449         d->tex_pb= s->tex_pb;
2450     }
2451     d->block= s->block;
2452     for(i=0; i<8; i++)
2453         d->block_last_index[i]= s->block_last_index[i];
2454     d->interlaced_dct= s->interlaced_dct;
2455     d->qscale= s->qscale;
2456
2457     d->esc3_level_length= s->esc3_level_length;
2458 }
2459
2460 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2461                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2462                            int *dmin, int *next_block, int motion_x, int motion_y)
2463 {
2464     int score;
2465     uint8_t *dest_backup[3];
2466
2467     copy_context_before_encode(s, backup, type);
2468
2469     s->block= s->blocks[*next_block];
2470     s->pb= pb[*next_block];
2471     if(s->data_partitioning){
2472         s->pb2   = pb2   [*next_block];
2473         s->tex_pb= tex_pb[*next_block];
2474     }
2475
2476     if(*next_block){
2477         memcpy(dest_backup, s->dest, sizeof(s->dest));
2478         s->dest[0] = s->sc.rd_scratchpad;
2479         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2480         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2481         assert(s->linesize >= 32); //FIXME
2482     }
2483
2484     encode_mb(s, motion_x, motion_y);
2485
2486     score= put_bits_count(&s->pb);
2487     if(s->data_partitioning){
2488         score+= put_bits_count(&s->pb2);
2489         score+= put_bits_count(&s->tex_pb);
2490     }
2491
2492     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2493         ff_mpv_decode_mb(s, s->block);
2494
2495         score *= s->lambda2;
2496         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2497     }
2498
2499     if(*next_block){
2500         memcpy(s->dest, dest_backup, sizeof(s->dest));
2501     }
2502
2503     if(score<*dmin){
2504         *dmin= score;
2505         *next_block^=1;
2506
2507         copy_context_after_encode(best, s, type);
2508     }
2509 }
2510
2511 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2512     uint32_t *sq = ff_square_tab + 256;
2513     int acc=0;
2514     int x,y;
2515
2516     if(w==16 && h==16)
2517         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2518     else if(w==8 && h==8)
2519         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2520
2521     for(y=0; y<h; y++){
2522         for(x=0; x<w; x++){
2523             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2524         }
2525     }
2526
2527     assert(acc>=0);
2528
2529     return acc;
2530 }
2531
2532 static int sse_mb(MpegEncContext *s){
2533     int w= 16;
2534     int h= 16;
2535
2536     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2537     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2538
2539     if(w==16 && h==16)
2540       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2541         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2542                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2543                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2544       }else{
2545         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2546                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2547                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2548       }
2549     else
2550         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2551                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2552                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2553 }
2554
2555 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2556     MpegEncContext *s= *(void**)arg;
2557
2558
2559     s->me.pre_pass=1;
2560     s->me.dia_size= s->avctx->pre_dia_size;
2561     s->first_slice_line=1;
2562     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2563         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2564             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2565         }
2566         s->first_slice_line=0;
2567     }
2568
2569     s->me.pre_pass=0;
2570
2571     return 0;
2572 }
2573
2574 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2575     MpegEncContext *s= *(void**)arg;
2576
2577     s->me.dia_size= s->avctx->dia_size;
2578     s->first_slice_line=1;
2579     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2580         s->mb_x=0; //for block init below
2581         ff_init_block_index(s);
2582         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2583             s->block_index[0]+=2;
2584             s->block_index[1]+=2;
2585             s->block_index[2]+=2;
2586             s->block_index[3]+=2;
2587
2588             /* compute motion vector & mb_type and store in context */
2589             if(s->pict_type==AV_PICTURE_TYPE_B)
2590                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2591             else
2592                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2593         }
2594         s->first_slice_line=0;
2595     }
2596     return 0;
2597 }
2598
2599 static int mb_var_thread(AVCodecContext *c, void *arg){
2600     MpegEncContext *s= *(void**)arg;
2601     int mb_x, mb_y;
2602
2603     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2604         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2605             int xx = mb_x * 16;
2606             int yy = mb_y * 16;
2607             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2608             int varc;
2609             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2610
2611             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2612                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2613
2614             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2615             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2616             s->me.mb_var_sum_temp    += varc;
2617         }
2618     }
2619     return 0;
2620 }
2621
2622 static void write_slice_end(MpegEncContext *s){
2623     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2624         if(s->partitioned_frame){
2625             ff_mpeg4_merge_partitions(s);
2626         }
2627
2628         ff_mpeg4_stuffing(&s->pb);
2629     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2630         ff_mjpeg_encode_stuffing(&s->pb);
2631     }
2632
2633     avpriv_align_put_bits(&s->pb);
2634     flush_put_bits(&s->pb);
2635
2636     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2637         s->misc_bits+= get_bits_diff(s);
2638 }
2639
2640 static void write_mb_info(MpegEncContext *s)
2641 {
2642     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2643     int offset = put_bits_count(&s->pb);
2644     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2645     int gobn = s->mb_y / s->gob_index;
2646     int pred_x, pred_y;
2647     if (CONFIG_H263_ENCODER)
2648         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2649     bytestream_put_le32(&ptr, offset);
2650     bytestream_put_byte(&ptr, s->qscale);
2651     bytestream_put_byte(&ptr, gobn);
2652     bytestream_put_le16(&ptr, mba);
2653     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2654     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2655     /* 4MV not implemented */
2656     bytestream_put_byte(&ptr, 0); /* hmv2 */
2657     bytestream_put_byte(&ptr, 0); /* vmv2 */
2658 }
2659
2660 static void update_mb_info(MpegEncContext *s, int startcode)
2661 {
2662     if (!s->mb_info)
2663         return;
2664     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2665         s->mb_info_size += 12;
2666         s->prev_mb_info = s->last_mb_info;
2667     }
2668     if (startcode) {
2669         s->prev_mb_info = put_bits_count(&s->pb)/8;
2670         /* This might have incremented mb_info_size above, and we return without
2671          * actually writing any info into that slot yet. But in that case,
2672          * this will be called again at the start of the after writing the
2673          * start code, actually writing the mb info. */
2674         return;
2675     }
2676
2677     s->last_mb_info = put_bits_count(&s->pb)/8;
2678     if (!s->mb_info_size)
2679         s->mb_info_size += 12;
2680     write_mb_info(s);
2681 }
2682
2683 static int encode_thread(AVCodecContext *c, void *arg){
2684     MpegEncContext *s= *(void**)arg;
2685     int mb_x, mb_y, pdif = 0;
2686     int chr_h= 16>>s->chroma_y_shift;
2687     int i, j;
2688     MpegEncContext best_s = { 0 }, backup_s;
2689     uint8_t bit_buf[2][MAX_MB_BYTES];
2690     uint8_t bit_buf2[2][MAX_MB_BYTES];
2691     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2692     PutBitContext pb[2], pb2[2], tex_pb[2];
2693
2694     for(i=0; i<2; i++){
2695         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2696         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2697         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2698     }
2699
2700     s->last_bits= put_bits_count(&s->pb);
2701     s->mv_bits=0;
2702     s->misc_bits=0;
2703     s->i_tex_bits=0;
2704     s->p_tex_bits=0;
2705     s->i_count=0;
2706     s->f_count=0;
2707     s->b_count=0;
2708     s->skip_count=0;
2709
2710     for(i=0; i<3; i++){
2711         /* init last dc values */
2712         /* note: quant matrix value (8) is implied here */
2713         s->last_dc[i] = 128 << s->intra_dc_precision;
2714
2715         s->current_picture.encoding_error[i] = 0;
2716     }
2717     s->mb_skip_run = 0;
2718     memset(s->last_mv, 0, sizeof(s->last_mv));
2719
2720     s->last_mv_dir = 0;
2721
2722     switch(s->codec_id){
2723     case AV_CODEC_ID_H263:
2724     case AV_CODEC_ID_H263P:
2725     case AV_CODEC_ID_FLV1:
2726         if (CONFIG_H263_ENCODER)
2727             s->gob_index = H263_GOB_HEIGHT(s->height);
2728         break;
2729     case AV_CODEC_ID_MPEG4:
2730         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2731             ff_mpeg4_init_partitions(s);
2732         break;
2733     }
2734
2735     s->resync_mb_x=0;
2736     s->resync_mb_y=0;
2737     s->first_slice_line = 1;
2738     s->ptr_lastgob = s->pb.buf;
2739     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2740         s->mb_x=0;
2741         s->mb_y= mb_y;
2742
2743         ff_set_qscale(s, s->qscale);
2744         ff_init_block_index(s);
2745
2746         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2747             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2748             int mb_type= s->mb_type[xy];
2749 //            int d;
2750             int dmin= INT_MAX;
2751             int dir;
2752
2753             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2754                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2755                 return -1;
2756             }
2757             if(s->data_partitioning){
2758                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2759                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2760                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2761                     return -1;
2762                 }
2763             }
2764
2765             s->mb_x = mb_x;
2766             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2767             ff_update_block_index(s);
2768
2769             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2770                 ff_h261_reorder_mb_index(s);
2771                 xy= s->mb_y*s->mb_stride + s->mb_x;
2772                 mb_type= s->mb_type[xy];
2773             }
2774
2775             /* write gob / video packet header  */
2776             if(s->rtp_mode){
2777                 int current_packet_size, is_gob_start;
2778
2779                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2780
2781                 is_gob_start = s->rtp_payload_size &&
2782                                current_packet_size >= s->rtp_payload_size &&
2783                                mb_y + mb_x > 0;
2784
2785                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2786
2787                 switch(s->codec_id){
2788                 case AV_CODEC_ID_H263:
2789                 case AV_CODEC_ID_H263P:
2790                     if(!s->h263_slice_structured)
2791                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2792                     break;
2793                 case AV_CODEC_ID_MPEG2VIDEO:
2794                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2795                 case AV_CODEC_ID_MPEG1VIDEO:
2796                     if(s->mb_skip_run) is_gob_start=0;
2797                     break;
2798                 }
2799
2800                 if(is_gob_start){
2801                     if(s->start_mb_y != mb_y || mb_x!=0){
2802                         write_slice_end(s);
2803
2804                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2805                             ff_mpeg4_init_partitions(s);
2806                         }
2807                     }
2808
2809                     assert((put_bits_count(&s->pb)&7) == 0);
2810                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2811
2812                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2813                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2814                         int d = 100 / s->error_rate;
2815                         if(r % d == 0){
2816                             current_packet_size=0;
2817                             s->pb.buf_ptr= s->ptr_lastgob;
2818                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2819                         }
2820                     }
2821
2822 #if FF_API_RTP_CALLBACK
2823 FF_DISABLE_DEPRECATION_WARNINGS
2824                     if (s->avctx->rtp_callback){
2825                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2826                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2827                     }
2828 FF_ENABLE_DEPRECATION_WARNINGS
2829 #endif
2830                     update_mb_info(s, 1);
2831
2832                     switch(s->codec_id){
2833                     case AV_CODEC_ID_MPEG4:
2834                         if (CONFIG_MPEG4_ENCODER) {
2835                             ff_mpeg4_encode_video_packet_header(s);
2836                             ff_mpeg4_clean_buffers(s);
2837                         }
2838                     break;
2839                     case AV_CODEC_ID_MPEG1VIDEO:
2840                     case AV_CODEC_ID_MPEG2VIDEO:
2841                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2842                             ff_mpeg1_encode_slice_header(s);
2843                             ff_mpeg1_clean_buffers(s);
2844                         }
2845                     break;
2846                     case AV_CODEC_ID_H263:
2847                     case AV_CODEC_ID_H263P:
2848                         if (CONFIG_H263_ENCODER)
2849                             ff_h263_encode_gob_header(s, mb_y);
2850                     break;
2851                     }
2852
2853                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2854                         int bits= put_bits_count(&s->pb);
2855                         s->misc_bits+= bits - s->last_bits;
2856                         s->last_bits= bits;
2857                     }
2858
2859                     s->ptr_lastgob += current_packet_size;
2860                     s->first_slice_line=1;
2861                     s->resync_mb_x=mb_x;
2862                     s->resync_mb_y=mb_y;
2863                 }
2864             }
2865
2866             if(  (s->resync_mb_x   == s->mb_x)
2867                && s->resync_mb_y+1 == s->mb_y){
2868                 s->first_slice_line=0;
2869             }
2870
2871             s->mb_skipped=0;
2872             s->dquant=0; //only for QP_RD
2873
2874             update_mb_info(s, 0);
2875
2876             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2877                 int next_block=0;
2878                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2879
2880                 copy_context_before_encode(&backup_s, s, -1);
2881                 backup_s.pb= s->pb;
2882                 best_s.data_partitioning= s->data_partitioning;
2883                 best_s.partitioned_frame= s->partitioned_frame;
2884                 if(s->data_partitioning){
2885                     backup_s.pb2= s->pb2;
2886                     backup_s.tex_pb= s->tex_pb;
2887                 }
2888
2889                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2890                     s->mv_dir = MV_DIR_FORWARD;
2891                     s->mv_type = MV_TYPE_16X16;
2892                     s->mb_intra= 0;
2893                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2894                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2895                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2896                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2897                 }
2898                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2899                     s->mv_dir = MV_DIR_FORWARD;
2900                     s->mv_type = MV_TYPE_FIELD;
2901                     s->mb_intra= 0;
2902                     for(i=0; i<2; i++){
2903                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2904                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2905                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2906                     }
2907                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2908                                  &dmin, &next_block, 0, 0);
2909                 }
2910                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2911                     s->mv_dir = MV_DIR_FORWARD;
2912                     s->mv_type = MV_TYPE_16X16;
2913                     s->mb_intra= 0;
2914                     s->mv[0][0][0] = 0;
2915                     s->mv[0][0][1] = 0;
2916                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2917                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2918                 }
2919                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2920                     s->mv_dir = MV_DIR_FORWARD;
2921                     s->mv_type = MV_TYPE_8X8;
2922                     s->mb_intra= 0;
2923                     for(i=0; i<4; i++){
2924                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2925                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2926                     }
2927                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2928                                  &dmin, &next_block, 0, 0);
2929                 }
2930                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2931                     s->mv_dir = MV_DIR_FORWARD;
2932                     s->mv_type = MV_TYPE_16X16;
2933                     s->mb_intra= 0;
2934                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2935                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2936                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2937                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2938                 }
2939                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2940                     s->mv_dir = MV_DIR_BACKWARD;
2941                     s->mv_type = MV_TYPE_16X16;
2942                     s->mb_intra= 0;
2943                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2944                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2945                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2946                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2947                 }
2948                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2949                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2950                     s->mv_type = MV_TYPE_16X16;
2951                     s->mb_intra= 0;
2952                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2953                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2954                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2955                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2956                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2957                                  &dmin, &next_block, 0, 0);
2958                 }
2959                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2960                     s->mv_dir = MV_DIR_FORWARD;
2961                     s->mv_type = MV_TYPE_FIELD;
2962                     s->mb_intra= 0;
2963                     for(i=0; i<2; i++){
2964                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2965                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2966                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2967                     }
2968                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2969                                  &dmin, &next_block, 0, 0);
2970                 }
2971                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2972                     s->mv_dir = MV_DIR_BACKWARD;
2973                     s->mv_type = MV_TYPE_FIELD;
2974                     s->mb_intra= 0;
2975                     for(i=0; i<2; i++){
2976                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2977                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2978                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2979                     }
2980                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2981                                  &dmin, &next_block, 0, 0);
2982                 }
2983                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2984                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2985                     s->mv_type = MV_TYPE_FIELD;
2986                     s->mb_intra= 0;
2987                     for(dir=0; dir<2; dir++){
2988                         for(i=0; i<2; i++){
2989                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2990                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2991                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2992                         }
2993                     }
2994                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2995                                  &dmin, &next_block, 0, 0);
2996                 }
2997                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2998                     s->mv_dir = 0;
2999                     s->mv_type = MV_TYPE_16X16;
3000                     s->mb_intra= 1;
3001                     s->mv[0][0][0] = 0;
3002                     s->mv[0][0][1] = 0;
3003                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3004                                  &dmin, &next_block, 0, 0);
3005                     if(s->h263_pred || s->h263_aic){
3006                         if(best_s.mb_intra)
3007                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3008                         else
3009                             ff_clean_intra_table_entries(s); //old mode?
3010                     }
3011                 }
3012
3013                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3014                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3015                         const int last_qp= backup_s.qscale;
3016                         int qpi, qp, dc[6];
3017                         int16_t ac[6][16];
3018                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3019                         static const int dquant_tab[4]={-1,1,-2,2};
3020
3021                         assert(backup_s.dquant == 0);
3022
3023                         //FIXME intra
3024                         s->mv_dir= best_s.mv_dir;
3025                         s->mv_type = MV_TYPE_16X16;
3026                         s->mb_intra= best_s.mb_intra;
3027                         s->mv[0][0][0] = best_s.mv[0][0][0];
3028                         s->mv[0][0][1] = best_s.mv[0][0][1];
3029                         s->mv[1][0][0] = best_s.mv[1][0][0];
3030                         s->mv[1][0][1] = best_s.mv[1][0][1];
3031
3032                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3033                         for(; qpi<4; qpi++){
3034                             int dquant= dquant_tab[qpi];
3035                             qp= last_qp + dquant;
3036                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3037                                 continue;
3038                             backup_s.dquant= dquant;
3039                             if(s->mb_intra && s->dc_val[0]){
3040                                 for(i=0; i<6; i++){
3041                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3042                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3043                                 }
3044                             }
3045
3046                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3047                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3048                             if(best_s.qscale != qp){
3049                                 if(s->mb_intra && s->dc_val[0]){
3050                                     for(i=0; i<6; i++){
3051                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3052                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3053                                     }
3054                                 }
3055                             }
3056                         }
3057                     }
3058                 }
3059                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3060                     int mx= s->b_direct_mv_table[xy][0];
3061                     int my= s->b_direct_mv_table[xy][1];
3062
3063                     backup_s.dquant = 0;
3064                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3065                     s->mb_intra= 0;
3066                     ff_mpeg4_set_direct_mv(s, mx, my);
3067                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3068                                  &dmin, &next_block, mx, my);
3069                 }
3070                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3071                     backup_s.dquant = 0;
3072                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3073                     s->mb_intra= 0;
3074                     ff_mpeg4_set_direct_mv(s, 0, 0);
3075                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3076                                  &dmin, &next_block, 0, 0);
3077                 }
3078                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3079                     int coded=0;
3080                     for(i=0; i<6; i++)
3081                         coded |= s->block_last_index[i];
3082                     if(coded){
3083                         int mx,my;
3084                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3085                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3086                             mx=my=0; //FIXME find the one we actually used
3087                             ff_mpeg4_set_direct_mv(s, mx, my);
3088                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3089                             mx= s->mv[1][0][0];
3090                             my= s->mv[1][0][1];
3091                         }else{
3092                             mx= s->mv[0][0][0];
3093                             my= s->mv[0][0][1];
3094                         }
3095
3096                         s->mv_dir= best_s.mv_dir;
3097                         s->mv_type = best_s.mv_type;
3098                         s->mb_intra= 0;
3099 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3100                         s->mv[0][0][1] = best_s.mv[0][0][1];
3101                         s->mv[1][0][0] = best_s.mv[1][0][0];
3102                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3103                         backup_s.dquant= 0;
3104                         s->skipdct=1;
3105                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3106                                         &dmin, &next_block, mx, my);
3107                         s->skipdct=0;
3108                     }
3109                 }
3110
3111                 s->current_picture.qscale_table[xy] = best_s.qscale;
3112
3113                 copy_context_after_encode(s, &best_s, -1);
3114
3115                 pb_bits_count= put_bits_count(&s->pb);
3116                 flush_put_bits(&s->pb);
3117                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3118                 s->pb= backup_s.pb;
3119
3120                 if(s->data_partitioning){
3121                     pb2_bits_count= put_bits_count(&s->pb2);
3122                     flush_put_bits(&s->pb2);
3123                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3124                     s->pb2= backup_s.pb2;
3125
3126                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3127                     flush_put_bits(&s->tex_pb);
3128                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3129                     s->tex_pb= backup_s.tex_pb;
3130                 }
3131                 s->last_bits= put_bits_count(&s->pb);
3132
3133                 if (CONFIG_H263_ENCODER &&
3134                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3135                     ff_h263_update_motion_val(s);
3136
3137                 if(next_block==0){ //FIXME 16 vs linesize16
3138                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3139                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3140                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3141                 }
3142
3143                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3144                     ff_mpv_decode_mb(s, s->block);
3145             } else {
3146                 int motion_x = 0, motion_y = 0;
3147                 s->mv_type=MV_TYPE_16X16;
3148                 // only one MB-Type possible
3149
3150                 switch(mb_type){
3151                 case CANDIDATE_MB_TYPE_INTRA:
3152                     s->mv_dir = 0;
3153                     s->mb_intra= 1;
3154                     motion_x= s->mv[0][0][0] = 0;
3155                     motion_y= s->mv[0][0][1] = 0;
3156                     break;
3157                 case CANDIDATE_MB_TYPE_INTER:
3158                     s->mv_dir = MV_DIR_FORWARD;
3159                     s->mb_intra= 0;
3160                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3161                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3162                     break;
3163                 case CANDIDATE_MB_TYPE_INTER_I:
3164                     s->mv_dir = MV_DIR_FORWARD;
3165                     s->mv_type = MV_TYPE_FIELD;
3166                     s->mb_intra= 0;
3167                     for(i=0; i<2; i++){
3168                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3169                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3170                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3171                     }
3172                     break;
3173                 case CANDIDATE_MB_TYPE_INTER4V:
3174                     s->mv_dir = MV_DIR_FORWARD;
3175                     s->mv_type = MV_TYPE_8X8;
3176                     s->mb_intra= 0;
3177                     for(i=0; i<4; i++){
3178                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3179                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3180                     }
3181                     break;
3182                 case CANDIDATE_MB_TYPE_DIRECT:
3183                     if (CONFIG_MPEG4_ENCODER) {
3184                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3185                         s->mb_intra= 0;
3186                         motion_x=s->b_direct_mv_table[xy][0];
3187                         motion_y=s->b_direct_mv_table[xy][1];
3188                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3189                     }
3190                     break;
3191                 case CANDIDATE_MB_TYPE_DIRECT0:
3192                     if (CONFIG_MPEG4_ENCODER) {
3193                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3194                         s->mb_intra= 0;
3195                         ff_mpeg4_set_direct_mv(s, 0, 0);
3196                     }
3197                     break;
3198                 case CANDIDATE_MB_TYPE_BIDIR:
3199                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3200                     s->mb_intra= 0;
3201                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3202                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3203                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3204                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3205                     break;
3206                 case CANDIDATE_MB_TYPE_BACKWARD:
3207                     s->mv_dir = MV_DIR_BACKWARD;
3208                     s->mb_intra= 0;
3209                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3210                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3211                     break;
3212                 case CANDIDATE_MB_TYPE_FORWARD:
3213                     s->mv_dir = MV_DIR_FORWARD;
3214                     s->mb_intra= 0;
3215                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3216                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3217                     break;
3218                 case CANDIDATE_MB_TYPE_FORWARD_I:
3219                     s->mv_dir = MV_DIR_FORWARD;
3220                     s->mv_type = MV_TYPE_FIELD;
3221                     s->mb_intra= 0;
3222                     for(i=0; i<2; i++){
3223                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3224                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3225                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3226                     }
3227                     break;
3228                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3229                     s->mv_dir = MV_DIR_BACKWARD;
3230                     s->mv_type = MV_TYPE_FIELD;
3231                     s->mb_intra= 0;
3232                     for(i=0; i<2; i++){
3233                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3234                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3235                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3236                     }
3237                     break;
3238                 case CANDIDATE_MB_TYPE_BIDIR_I:
3239                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3240                     s->mv_type = MV_TYPE_FIELD;
3241                     s->mb_intra= 0;
3242                     for(dir=0; dir<2; dir++){
3243                         for(i=0; i<2; i++){
3244                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3245                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3246                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3247                         }
3248                     }
3249                     break;
3250                 default:
3251                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3252                 }
3253
3254                 encode_mb(s, motion_x, motion_y);
3255
3256                 // RAL: Update last macroblock type
3257                 s->last_mv_dir = s->mv_dir;
3258
3259                 if (CONFIG_H263_ENCODER &&
3260                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3261                     ff_h263_update_motion_val(s);
3262
3263                 ff_mpv_decode_mb(s, s->block);
3264             }
3265
3266             /* clean the MV table in IPS frames for direct mode in B frames */
3267             if(s->mb_intra /* && I,P,S_TYPE */){
3268                 s->p_mv_table[xy][0]=0;
3269                 s->p_mv_table[xy][1]=0;
3270             }
3271
3272             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3273                 int w= 16;
3274                 int h= 16;
3275
3276                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3277                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3278
3279                 s->current_picture.encoding_error[0] += sse(
3280                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3281                     s->dest[0], w, h, s->linesize);
3282                 s->current_picture.encoding_error[1] += sse(
3283                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3284                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3285                 s->current_picture.encoding_error[2] += sse(
3286                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3287                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3288             }
3289             if(s->loop_filter){
3290                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3291                     ff_h263_loop_filter(s);
3292             }
3293             ff_dlog(s->avctx, "MB %d %d bits\n",
3294                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3295         }
3296     }
3297
3298     //not beautiful here but we must write it before flushing so it has to be here
3299     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3300         ff_msmpeg4_encode_ext_header(s);
3301
3302     write_slice_end(s);
3303
3304 #if FF_API_RTP_CALLBACK
3305 FF_DISABLE_DEPRECATION_WARNINGS
3306     /* Send the last GOB if RTP */
3307     if (s->avctx->rtp_callback) {
3308         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3309         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3310         /* Call the RTP callback to send the last GOB */
3311         emms_c();
3312         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3313     }
3314 FF_ENABLE_DEPRECATION_WARNINGS
3315 #endif
3316
3317     return 0;
3318 }
3319
3320 #define MERGE(field) dst->field += src->field; src->field=0
3321 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3322     MERGE(me.scene_change_score);
3323     MERGE(me.mc_mb_var_sum_temp);
3324     MERGE(me.mb_var_sum_temp);
3325 }
3326
3327 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3328     int i;
3329
3330     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3331     MERGE(dct_count[1]);
3332     MERGE(mv_bits);
3333     MERGE(i_tex_bits);
3334     MERGE(p_tex_bits);
3335     MERGE(i_count);
3336     MERGE(f_count);
3337     MERGE(b_count);
3338     MERGE(skip_count);
3339     MERGE(misc_bits);
3340     MERGE(er.error_count);
3341     MERGE(padding_bug_score);
3342     MERGE(current_picture.encoding_error[0]);
3343     MERGE(current_picture.encoding_error[1]);
3344     MERGE(current_picture.encoding_error[2]);
3345
3346     if (dst->noise_reduction){
3347         for(i=0; i<64; i++){
3348             MERGE(dct_error_sum[0][i]);
3349             MERGE(dct_error_sum[1][i]);
3350         }
3351     }
3352
3353     assert(put_bits_count(&src->pb) % 8 ==0);
3354     assert(put_bits_count(&dst->pb) % 8 ==0);
3355     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3356     flush_put_bits(&dst->pb);
3357 }
3358
3359 static int estimate_qp(MpegEncContext *s, int dry_run){
3360     if (s->next_lambda){
3361         s->current_picture_ptr->f->quality =
3362         s->current_picture.f->quality = s->next_lambda;
3363         if(!dry_run) s->next_lambda= 0;
3364     } else if (!s->fixed_qscale) {
3365         s->current_picture_ptr->f->quality =
3366         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3367         if (s->current_picture.f->quality < 0)
3368             return -1;
3369     }
3370
3371     if(s->adaptive_quant){
3372         switch(s->codec_id){
3373         case AV_CODEC_ID_MPEG4:
3374             if (CONFIG_MPEG4_ENCODER)
3375                 ff_clean_mpeg4_qscales(s);
3376             break;
3377         case AV_CODEC_ID_H263:
3378         case AV_CODEC_ID_H263P:
3379         case AV_CODEC_ID_FLV1:
3380             if (CONFIG_H263_ENCODER)
3381                 ff_clean_h263_qscales(s);
3382             break;
3383         default:
3384             ff_init_qscale_tab(s);
3385         }
3386
3387         s->lambda= s->lambda_table[0];
3388         //FIXME broken
3389     }else
3390         s->lambda = s->current_picture.f->quality;
3391     update_qscale(s);
3392     return 0;
3393 }
3394
3395 /* must be called before writing the header */
3396 static void set_frame_distances(MpegEncContext * s){
3397     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3398     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3399
3400     if(s->pict_type==AV_PICTURE_TYPE_B){
3401         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3402         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3403     }else{
3404         s->pp_time= s->time - s->last_non_b_time;
3405         s->last_non_b_time= s->time;
3406         assert(s->picture_number==0 || s->pp_time > 0);
3407     }
3408 }
3409
3410 static int encode_picture(MpegEncContext *s, int picture_number)
3411 {
3412     int i, ret;
3413     int bits;
3414     int context_count = s->slice_context_count;
3415
3416     s->picture_number = picture_number;
3417
3418     /* Reset the average MB variance */
3419     s->me.mb_var_sum_temp    =
3420     s->me.mc_mb_var_sum_temp = 0;
3421
3422     /* we need to initialize some time vars before we can encode b-frames */
3423     // RAL: Condition added for MPEG1VIDEO
3424     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3425         set_frame_distances(s);
3426     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3427         ff_set_mpeg4_time(s);
3428
3429     s->me.scene_change_score=0;
3430
3431 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3432
3433     if(s->pict_type==AV_PICTURE_TYPE_I){
3434         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3435         else                        s->no_rounding=0;
3436     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3437         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3438             s->no_rounding ^= 1;
3439     }
3440
3441     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3442         if (estimate_qp(s,1) < 0)
3443             return -1;
3444         ff_get_2pass_fcode(s);
3445     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3446         if(s->pict_type==AV_PICTURE_TYPE_B)
3447             s->lambda= s->last_lambda_for[s->pict_type];
3448         else
3449             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3450         update_qscale(s);
3451     }
3452
3453     s->mb_intra=0; //for the rate distortion & bit compare functions
3454     for(i=1; i<context_count; i++){
3455         ret = ff_update_duplicate_context(s->thread_context[i], s);
3456         if (ret < 0)
3457             return ret;
3458     }
3459
3460     if(ff_init_me(s)<0)
3461         return -1;
3462
3463     /* Estimate motion for every MB */
3464     if(s->pict_type != AV_PICTURE_TYPE_I){
3465         s->lambda  = (s->lambda  * s->me_penalty_compensation + 128) >> 8;
3466         s->lambda2 = (s->lambda2 * (int64_t) s->me_penalty_compensation + 128) >> 8;
3467         if (s->pict_type != AV_PICTURE_TYPE_B) {
3468             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3469                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3470             }
3471         }
3472
3473         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3474     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3475         /* I-Frame */
3476         for(i=0; i<s->mb_stride*s->mb_height; i++)
3477             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3478
3479         if(!s->fixed_qscale){
3480             /* finding spatial complexity for I-frame rate control */
3481             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3482         }
3483     }
3484     for(i=1; i<context_count; i++){
3485         merge_context_after_me(s, s->thread_context[i]);
3486     }
3487     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3488     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3489     emms_c();
3490
3491     if (s->me.scene_change_score > s->scenechange_threshold &&
3492         s->pict_type == AV_PICTURE_TYPE_P) {
3493         s->pict_type= AV_PICTURE_TYPE_I;
3494         for(i=0; i<s->mb_stride*s->mb_height; i++)
3495             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3496         ff_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3497                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3498     }
3499
3500     if(!s->umvplus){
3501         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3502             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3503
3504             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3505                 int a,b;
3506                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3507                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3508                 s->f_code= FFMAX3(s->f_code, a, b);
3509             }
3510
3511             ff_fix_long_p_mvs(s);
3512             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3513             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3514                 int j;
3515                 for(i=0; i<2; i++){
3516                     for(j=0; j<2; j++)
3517                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3518                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3519                 }
3520             }
3521         }
3522
3523         if(s->pict_type==AV_PICTURE_TYPE_B){
3524             int a, b;
3525
3526             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3527             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3528             s->f_code = FFMAX(a, b);
3529
3530             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3531             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3532             s->b_code = FFMAX(a, b);
3533
3534             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3535             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3536             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3537             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3538             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3539                 int dir, j;
3540                 for(dir=0; dir<2; dir++){
3541                     for(i=0; i<2; i++){
3542                         for(j=0; j<2; j++){
3543                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3544                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3545                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3546                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3547                         }
3548                     }
3549                 }
3550             }
3551         }
3552     }
3553
3554     if (estimate_qp(s, 0) < 0)
3555         return -1;
3556
3557     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3558         s->pict_type == AV_PICTURE_TYPE_I &&
3559         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3560         s->qscale= 3; //reduce clipping problems
3561
3562     if (s->out_format == FMT_MJPEG) {
3563         /* for mjpeg, we do include qscale in the matrix */
3564         for(i=1;i<64;i++){
3565             int j = s->idsp.idct_permutation[i];
3566
3567             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3568         }
3569         s->y_dc_scale_table=
3570         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3571         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3572         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3573                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3574         s->qscale= 8;
3575     }
3576
3577     //FIXME var duplication
3578     s->current_picture_ptr->f->key_frame =
3579     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3580     s->current_picture_ptr->f->pict_type =
3581     s->current_picture.f->pict_type = s->pict_type;
3582
3583     if (s->current_picture.f->key_frame)
3584         s->picture_in_gop_number=0;
3585
3586     s->last_bits= put_bits_count(&s->pb);
3587     switch(s->out_format) {
3588     case FMT_MJPEG:
3589         if (CONFIG_MJPEG_ENCODER)
3590             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3591                                            s->pred, s->intra_matrix);
3592         break;
3593     case FMT_H261:
3594         if (CONFIG_H261_ENCODER)
3595             ff_h261_encode_picture_header(s, picture_number);
3596         break;
3597     case FMT_H263:
3598         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3599             ff_wmv2_encode_picture_header(s, picture_number);
3600         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3601             ff_msmpeg4_encode_picture_header(s, picture_number);
3602         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3603             ff_mpeg4_encode_picture_header(s, picture_number);
3604         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3605             ret = ff_rv10_encode_picture_header(s, picture_number);
3606             if (ret < 0)
3607                 return ret;
3608         }
3609         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3610             ff_rv20_encode_picture_header(s, picture_number);
3611         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3612             ff_flv_encode_picture_header(s, picture_number);
3613         else if (CONFIG_H263_ENCODER)
3614             ff_h263_encode_picture_header(s, picture_number);
3615         break;
3616     case FMT_MPEG1:
3617         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3618             ff_mpeg1_encode_picture_header(s, picture_number);
3619         break;
3620     default:
3621         assert(0);
3622     }
3623     bits= put_bits_count(&s->pb);
3624     s->header_bits= bits - s->last_bits;
3625
3626     for(i=1; i<context_count; i++){
3627         update_duplicate_context_after_me(s->thread_context[i], s);
3628     }
3629     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3630     for(i=1; i<context_count; i++){
3631         merge_context_after_encode(s, s->thread_context[i]);
3632     }
3633     emms_c();
3634     return 0;
3635 }
3636
3637 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3638     const int intra= s->mb_intra;
3639     int i;
3640
3641     s->dct_count[intra]++;
3642
3643     for(i=0; i<64; i++){
3644         int level= block[i];
3645
3646         if(level){
3647             if(level>0){
3648                 s->dct_error_sum[intra][i] += level;
3649                 level -= s->dct_offset[intra][i];
3650                 if(level<0) level=0;
3651             }else{
3652                 s->dct_error_sum[intra][i] -= level;
3653                 level += s->dct_offset[intra][i];
3654                 if(level>0) level=0;
3655             }
3656             block[i]= level;
3657         }
3658     }
3659 }
3660
3661 static int dct_quantize_trellis_c(MpegEncContext *s,
3662                                   int16_t *block, int n,
3663                                   int qscale, int *overflow){
3664     const int *qmat;
3665     const uint8_t *scantable= s->intra_scantable.scantable;
3666     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3667     int max=0;
3668     unsigned int threshold1, threshold2;
3669     int bias=0;
3670     int run_tab[65];
3671     int level_tab[65];
3672     int score_tab[65];
3673     int survivor[65];
3674     int survivor_count;
3675     int last_run=0;
3676     int last_level=0;
3677     int last_score= 0;
3678     int last_i;
3679     int coeff[2][64];
3680     int coeff_count[64];
3681     int qmul, qadd, start_i, last_non_zero, i, dc;
3682     const int esc_length= s->ac_esc_length;
3683     uint8_t * length;
3684     uint8_t * last_length;
3685     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3686
3687     s->fdsp.fdct(block);
3688
3689     if(s->dct_error_sum)
3690         s->denoise_dct(s, block);
3691     qmul= qscale*16;
3692     qadd= ((qscale-1)|1)*8;
3693
3694     if (s->mb_intra) {
3695         int q;
3696         if (!s->h263_aic) {
3697             if (n < 4)
3698                 q = s->y_dc_scale;
3699             else
3700                 q = s->c_dc_scale;
3701             q = q << 3;
3702         } else{
3703             /* For AIC we skip quant/dequant of INTRADC */
3704             q = 1 << 3;
3705             qadd=0;
3706         }
3707
3708         /* note: block[0] is assumed to be positive */
3709         block[0] = (block[0] + (q >> 1)) / q;
3710         start_i = 1;
3711         last_non_zero = 0;
3712         qmat = s->q_intra_matrix[qscale];
3713         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3714             bias= 1<<(QMAT_SHIFT-1);
3715         length     = s->intra_ac_vlc_length;
3716         last_length= s->intra_ac_vlc_last_length;
3717     } else {
3718         start_i = 0;
3719         last_non_zero = -1;
3720         qmat = s->q_inter_matrix[qscale];
3721         length     = s->inter_ac_vlc_length;
3722         last_length= s->inter_ac_vlc_last_length;
3723     }
3724     last_i= start_i;
3725
3726     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3727     threshold2= (threshold1<<1);
3728
3729     for(i=63; i>=start_i; i--) {
3730         const int j = scantable[i];
3731         int level = block[j] * qmat[j];
3732
3733         if(((unsigned)(level+threshold1))>threshold2){
3734             last_non_zero = i;
3735             break;
3736         }
3737     }
3738
3739     for(i=start_i; i<=last_non_zero; i++) {
3740         const int j = scantable[i];
3741         int level = block[j] * qmat[j];
3742
3743 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3744 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3745         if(((unsigned)(level+threshold1))>threshold2){
3746             if(level>0){
3747                 level= (bias + level)>>QMAT_SHIFT;
3748                 coeff[0][i]= level;
3749                 coeff[1][i]= level-1;
3750 //                coeff[2][k]= level-2;
3751             }else{
3752                 level= (bias - level)>>QMAT_SHIFT;
3753                 coeff[0][i]= -level;
3754                 coeff[1][i]= -level+1;
3755 //                coeff[2][k]= -level+2;
3756             }
3757             coeff_count[i]= FFMIN(level, 2);
3758             assert(coeff_count[i]);
3759             max |=level;
3760         }else{
3761             coeff[0][i]= (level>>31)|1;
3762             coeff_count[i]= 1;
3763         }
3764     }
3765
3766     *overflow= s->max_qcoeff < max; //overflow might have happened
3767
3768     if(last_non_zero < start_i){
3769         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3770         return last_non_zero;
3771     }
3772
3773     score_tab[start_i]= 0;
3774     survivor[0]= start_i;
3775     survivor_count= 1;
3776
3777     for(i=start_i; i<=last_non_zero; i++){
3778         int level_index, j, zero_distortion;
3779         int dct_coeff= FFABS(block[ scantable[i] ]);
3780         int best_score=256*256*256*120;
3781
3782         if (s->fdsp.fdct == ff_fdct_ifast)
3783             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3784         zero_distortion= dct_coeff*dct_coeff;
3785
3786         for(level_index=0; level_index < coeff_count[i]; level_index++){
3787             int distortion;
3788             int level= coeff[level_index][i];
3789             const int alevel= FFABS(level);
3790             int unquant_coeff;
3791
3792             assert(level);
3793
3794             if(s->out_format == FMT_H263){
3795                 unquant_coeff= alevel*qmul + qadd;
3796             }else{ //MPEG1
3797                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3798                 if(s->mb_intra){
3799                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3800                         unquant_coeff =   (unquant_coeff - 1) | 1;
3801                 }else{
3802                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3803                         unquant_coeff =   (unquant_coeff - 1) | 1;
3804                 }
3805                 unquant_coeff<<= 3;
3806             }
3807
3808             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3809             level+=64;
3810             if((level&(~127)) == 0){
3811                 for(j=survivor_count-1; j>=0; j--){
3812                     int run= i - survivor[j];
3813                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3814                     score += score_tab[i-run];
3815
3816                     if(score < best_score){
3817                         best_score= score;
3818                         run_tab[i+1]= run;
3819                         level_tab[i+1]= level-64;
3820                     }
3821                 }
3822
3823                 if(s->out_format == FMT_H263){
3824                     for(j=survivor_count-1; j>=0; j--){
3825                         int run= i - survivor[j];
3826                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3827                         score += score_tab[i-run];
3828                         if(score < last_score){
3829                             last_score= score;
3830                             last_run= run;
3831                             last_level= level-64;
3832                             last_i= i+1;
3833                         }
3834                     }
3835                 }
3836             }else{
3837                 distortion += esc_length*lambda;
3838                 for(j=survivor_count-1; j>=0; j--){
3839                     int run= i - survivor[j];
3840                     int score= distortion + score_tab[i-run];
3841
3842                     if(score < best_score){
3843                         best_score= score;
3844                         run_tab[i+1]= run;
3845                         level_tab[i+1]= level-64;
3846                     }
3847                 }
3848
3849                 if(s->out_format == FMT_H263){
3850                   for(j=survivor_count-1; j>=0; j--){
3851                         int run= i - survivor[j];
3852                         int score= distortion + score_tab[i-run];
3853                         if(score < last_score){
3854                             last_score= score;
3855                             last_run= run;
3856                             last_level= level-64;
3857                             last_i= i+1;
3858                         }
3859                     }
3860                 }
3861             }
3862         }
3863
3864         score_tab[i+1]= best_score;
3865
3866         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3867         if(last_non_zero <= 27){
3868             for(; survivor_count; survivor_count--){
3869                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3870                     break;
3871             }
3872         }else{
3873             for(; survivor_count; survivor_count--){
3874                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3875                     break;
3876             }
3877         }
3878
3879         survivor[ survivor_count++ ]= i+1;
3880     }
3881
3882     if(s->out_format != FMT_H263){
3883         last_score= 256*256*256*120;
3884         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3885             int score= score_tab[i];
3886             if(i) score += lambda*2; //FIXME exacter?
3887
3888             if(score < last_score){
3889                 last_score= score;
3890                 last_i= i;
3891                 last_level= level_tab[i];
3892                 last_run= run_tab[i];
3893             }
3894         }
3895     }
3896
3897     s->coded_score[n] = last_score;
3898
3899     dc= FFABS(block[0]);
3900     last_non_zero= last_i - 1;
3901     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3902
3903     if(last_non_zero < start_i)
3904         return last_non_zero;
3905
3906     if(last_non_zero == 0 && start_i == 0){
3907         int best_level= 0;
3908         int best_score= dc * dc;
3909
3910         for(i=0; i<coeff_count[0]; i++){
3911             int level= coeff[i][0];
3912             int alevel= FFABS(level);
3913             int unquant_coeff, score, distortion;
3914
3915             if(s->out_format == FMT_H263){
3916                     unquant_coeff= (alevel*qmul + qadd)>>3;
3917             }else{ //MPEG1
3918                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3919                     unquant_coeff =   (unquant_coeff - 1) | 1;
3920             }
3921             unquant_coeff = (unquant_coeff + 4) >> 3;
3922             unquant_coeff<<= 3 + 3;
3923
3924             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3925             level+=64;
3926             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3927             else                    score= distortion + esc_length*lambda;
3928
3929             if(score < best_score){
3930                 best_score= score;
3931                 best_level= level - 64;
3932             }
3933         }
3934         block[0]= best_level;
3935         s->coded_score[n] = best_score - dc*dc;
3936         if(best_level == 0) return -1;
3937         else                return last_non_zero;
3938     }
3939
3940     i= last_i;
3941     assert(last_level);
3942
3943     block[ perm_scantable[last_non_zero] ]= last_level;
3944     i -= last_run + 1;
3945
3946     for(; i>start_i; i -= run_tab[i] + 1){
3947         block[ perm_scantable[i-1] ]= level_tab[i];
3948     }
3949
3950     return last_non_zero;
3951 }
3952
3953 //#define REFINE_STATS 1
3954 static int16_t basis[64][64];
3955
3956 static void build_basis(uint8_t *perm){
3957     int i, j, x, y;
3958     emms_c();
3959     for(i=0; i<8; i++){
3960         for(j=0; j<8; j++){
3961             for(y=0; y<8; y++){
3962                 for(x=0; x<8; x++){
3963                     double s= 0.25*(1<<BASIS_SHIFT);
3964                     int index= 8*i + j;
3965                     int perm_index= perm[index];
3966                     if(i==0) s*= sqrt(0.5);
3967                     if(j==0) s*= sqrt(0.5);
3968                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3969                 }
3970             }
3971         }
3972     }
3973 }
3974
3975 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3976                         int16_t *block, int16_t *weight, int16_t *orig,
3977                         int n, int qscale){
3978     int16_t rem[64];
3979     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3980     const uint8_t *scantable= s->intra_scantable.scantable;
3981     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3982 //    unsigned int threshold1, threshold2;
3983 //    int bias=0;
3984     int run_tab[65];
3985     int prev_run=0;
3986     int prev_level=0;
3987     int qmul, qadd, start_i, last_non_zero, i, dc;
3988     uint8_t * length;
3989     uint8_t * last_length;
3990     int lambda;
3991     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3992 #ifdef REFINE_STATS
3993 static int count=0;
3994 static int after_last=0;
3995 static int to_zero=0;
3996 static int from_zero=0;
3997 static int raise=0;
3998 static int lower=0;
3999 static int messed_sign=0;
4000 #endif
4001
4002     if(basis[0][0] == 0)
4003         build_basis(s->idsp.idct_permutation);
4004
4005     qmul= qscale*2;
4006     qadd= (qscale-1)|1;
4007     if (s->mb_intra) {
4008         if (!s->h263_aic) {
4009             if (n < 4)
4010                 q = s->y_dc_scale;
4011             else
4012                 q = s->c_dc_scale;
4013         } else{
4014             /* For AIC we skip quant/dequant of INTRADC */
4015             q = 1;
4016             qadd=0;
4017         }
4018         q <<= RECON_SHIFT-3;
4019         /* note: block[0] is assumed to be positive */
4020         dc= block[0]*q;
4021 //        block[0] = (block[0] + (q >> 1)) / q;
4022         start_i = 1;
4023 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4024 //            bias= 1<<(QMAT_SHIFT-1);
4025         length     = s->intra_ac_vlc_length;
4026         last_length= s->intra_ac_vlc_last_length;
4027     } else {
4028         dc= 0;
4029         start_i = 0;
4030         length     = s->inter_ac_vlc_length;
4031         last_length= s->inter_ac_vlc_last_length;
4032     }
4033     last_non_zero = s->block_last_index[n];
4034
4035 #ifdef REFINE_STATS
4036 {START_TIMER
4037 #endif
4038     dc += (1<<(RECON_SHIFT-1));
4039     for(i=0; i<64; i++){
4040         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4041     }
4042 #ifdef REFINE_STATS
4043 STOP_TIMER("memset rem[]")}
4044 #endif
4045     sum=0;
4046     for(i=0; i<64; i++){
4047         int one= 36;
4048         int qns=4;
4049         int w;
4050
4051         w= FFABS(weight[i]) + qns*one;
4052         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4053
4054         weight[i] = w;
4055 //        w=weight[i] = (63*qns + (w/2)) / w;
4056
4057         assert(w>0);
4058         assert(w<(1<<6));
4059         sum += w*w;
4060     }
4061     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4062 #ifdef REFINE_STATS
4063 {START_TIMER
4064 #endif
4065     run=0;
4066     rle_index=0;
4067     for(i=start_i; i<=last_non_zero; i++){
4068         int j= perm_scantable[i];
4069         const int level= block[j];
4070         int coeff;
4071
4072         if(level){
4073             if(level<0) coeff= qmul*level - qadd;
4074             else        coeff= qmul*level + qadd;
4075             run_tab[rle_index++]=run;
4076             run=0;
4077
4078             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4079         }else{
4080             run++;
4081         }
4082     }
4083 #ifdef REFINE_STATS
4084 if(last_non_zero>0){
4085 STOP_TIMER("init rem[]")
4086 }
4087 }
4088
4089 {START_TIMER
4090 #endif
4091     for(;;){
4092         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4093         int best_coeff=0;
4094         int best_change=0;
4095         int run2, best_unquant_change=0, analyze_gradient;
4096 #ifdef REFINE_STATS
4097 {START_TIMER
4098 #endif
4099         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4100
4101         if(analyze_gradient){
4102 #ifdef REFINE_STATS
4103 {START_TIMER
4104 #endif
4105             for(i=0; i<64; i++){
4106                 int w= weight[i];
4107
4108                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4109             }
4110 #ifdef REFINE_STATS
4111 STOP_TIMER("rem*w*w")}
4112 {START_TIMER
4113 #endif
4114             s->fdsp.fdct(d1);
4115 #ifdef REFINE_STATS
4116 STOP_TIMER("dct")}
4117 #endif
4118         }
4119
4120         if(start_i){
4121             const int level= block[0];
4122             int change, old_coeff;
4123
4124             assert(s->mb_intra);
4125
4126             old_coeff= q*level;
4127
4128             for(change=-1; change<=1; change+=2){
4129                 int new_level= level + change;
4130                 int score, new_coeff;
4131
4132                 new_coeff= q*new_level;
4133                 if(new_coeff >= 2048 || new_coeff < 0)
4134                     continue;
4135
4136                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4137                                                   new_coeff - old_coeff);
4138                 if(score<best_score){
4139                     best_score= score;
4140                     best_coeff= 0;
4141                     best_change= change;
4142                     best_unquant_change= new_coeff - old_coeff;
4143                 }
4144             }
4145         }
4146
4147         run=0;
4148         rle_index=0;
4149         run2= run_tab[rle_index++];
4150         prev_level=0;
4151         prev_run=0;
4152
4153         for(i=start_i; i<64; i++){
4154             int j= perm_scantable[i];
4155             const int level= block[j];
4156             int change, old_coeff;
4157
4158             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4159                 break;
4160
4161             if(level){
4162                 if(level<0) old_coeff= qmul*level - qadd;
4163                 else        old_coeff= qmul*level + qadd;
4164                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4165             }else{
4166                 old_coeff=0;
4167                 run2--;
4168                 assert(run2>=0 || i >= last_non_zero );
4169             }
4170
4171             for(change=-1; change<=1; change+=2){
4172                 int new_level= level + change;
4173                 int score, new_coeff, unquant_change;
4174
4175                 score=0;
4176                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4177                    continue;
4178
4179                 if(new_level){
4180                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4181                     else            new_coeff= qmul*new_level + qadd;
4182                     if(new_coeff >= 2048 || new_coeff <= -2048)
4183                         continue;
4184                     //FIXME check for overflow
4185
4186                     if(level){
4187                         if(level < 63 && level > -63){
4188                             if(i < last_non_zero)
4189                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4190                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4191                             else
4192                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4193                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4194                         }
4195                     }else{
4196                         assert(FFABS(new_level)==1);
4197
4198                         if(analyze_gradient){
4199                             int g= d1[ scantable[i] ];
4200                             if(g && (g^new_level) >= 0)
4201                                 continue;
4202                         }
4203
4204                         if(i < last_non_zero){
4205                             int next_i= i + run2 + 1;
4206                             int next_level= block[ perm_scantable[next_i] ] + 64;
4207
4208                             if(next_level&(~127))
4209                                 next_level= 0;
4210
4211                             if(next_i < last_non_zero)
4212                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4213                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4214                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4215                             else
4216                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4217                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4218                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4219                         }else{
4220                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4221                             if(prev_level){
4222                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4223                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4224                             }
4225                         }
4226                     }
4227                 }else{
4228                     new_coeff=0;
4229                     assert(FFABS(level)==1);
4230
4231                     if(i < last_non_zero){
4232                         int next_i= i + run2 + 1;
4233                         int next_level= block[ perm_scantable[next_i] ] + 64;
4234
4235                         if(next_level&(~127))
4236                             next_level= 0;
4237
4238                         if(next_i < last_non_zero)
4239                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4240                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4241                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4242                         else
4243                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4244                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4245                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4246                     }else{
4247                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4248                         if(prev_level){
4249                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4250                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4251                         }
4252                     }
4253                 }
4254
4255                 score *= lambda;
4256
4257                 unquant_change= new_coeff - old_coeff;
4258                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4259
4260                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4261                                                    unquant_change);
4262                 if(score<best_score){
4263                     best_score= score;
4264                     best_coeff= i;
4265                     best_change= change;
4266                     best_unquant_change= unquant_change;
4267                 }
4268             }
4269             if(level){
4270                 prev_level= level + 64;
4271                 if(prev_level&(~127))
4272                     prev_level= 0;
4273                 prev_run= run;
4274                 run=0;
4275             }else{
4276                 run++;
4277             }
4278         }
4279 #ifdef REFINE_STATS
4280 STOP_TIMER("iterative step")}
4281 #endif
4282
4283         if(best_change){
4284             int j= perm_scantable[ best_coeff ];
4285
4286             block[j] += best_change;
4287
4288             if(best_coeff > last_non_zero){
4289                 last_non_zero= best_coeff;
4290                 assert(block[j]);
4291 #ifdef REFINE_STATS
4292 after_last++;
4293 #endif
4294             }else{
4295 #ifdef REFINE_STATS
4296 if(block[j]){
4297     if(block[j] - best_change){
4298         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4299             raise++;
4300         }else{
4301             lower++;
4302         }
4303     }else{
4304         from_zero++;
4305     }
4306 }else{
4307     to_zero++;
4308 }
4309 #endif
4310                 for(; last_non_zero>=start_i; last_non_zero--){
4311                     if(block[perm_scantable[last_non_zero]])
4312                         break;
4313                 }
4314             }
4315 #ifdef REFINE_STATS
4316 count++;
4317 if(256*256*256*64 % count == 0){
4318     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4319 }
4320 #endif
4321             run=0;
4322             rle_index=0;
4323             for(i=start_i; i<=last_non_zero; i++){
4324                 int j= perm_scantable[i];
4325                 const int level= block[j];
4326
4327                  if(level){
4328                      run_tab[rle_index++]=run;
4329                      run=0;
4330                  }else{
4331                      run++;
4332                  }
4333             }
4334
4335             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4336         }else{
4337             break;
4338         }
4339     }
4340 #ifdef REFINE_STATS
4341 if(last_non_zero>0){
4342 STOP_TIMER("iterative search")
4343 }
4344 }
4345 #endif
4346
4347     return last_non_zero;
4348 }
4349
4350 /**
4351  * Permute an 8x8 block according to permuatation.
4352  * @param block the block which will be permuted according to
4353  *              the given permutation vector
4354  * @param permutation the permutation vector
4355  * @param last the last non zero coefficient in scantable order, used to
4356  *             speed the permutation up
4357  * @param scantable the used scantable, this is only used to speed the
4358  *                  permutation up, the block is not (inverse) permutated
4359  *                  to scantable order!
4360  */
4361 static void block_permute(int16_t *block, uint8_t *permutation,
4362                           const uint8_t *scantable, int last)
4363 {
4364     int i;
4365     int16_t temp[64];
4366
4367     if (last <= 0)
4368         return;
4369     //FIXME it is ok but not clean and might fail for some permutations
4370     // if (permutation[1] == 1)
4371     // return;
4372
4373     for (i = 0; i <= last; i++) {
4374         const int j = scantable[i];
4375         temp[j] = block[j];
4376         block[j] = 0;
4377     }
4378
4379     for (i = 0; i <= last; i++) {
4380         const int j = scantable[i];
4381         const int perm_j = permutation[j];
4382         block[perm_j] = temp[j];
4383     }
4384 }
4385
4386 int ff_dct_quantize_c(MpegEncContext *s,
4387                         int16_t *block, int n,
4388                         int qscale, int *overflow)
4389 {
4390     int i, j, level, last_non_zero, q, start_i;
4391     const int *qmat;
4392     const uint8_t *scantable= s->intra_scantable.scantable;
4393     int bias;
4394     int max=0;
4395     unsigned int threshold1, threshold2;
4396
4397     s->fdsp.fdct(block);
4398
4399     if(s->dct_error_sum)
4400         s->denoise_dct(s, block);
4401
4402     if (s->mb_intra) {
4403         if (!s->h263_aic) {
4404             if (n < 4)
4405                 q = s->y_dc_scale;
4406             else
4407                 q = s->c_dc_scale;
4408             q = q << 3;
4409         } else
4410             /* For AIC we skip quant/dequant of INTRADC */
4411             q = 1 << 3;
4412
4413         /* note: block[0] is assumed to be positive */
4414         block[0] = (block[0] + (q >> 1)) / q;
4415         start_i = 1;
4416         last_non_zero = 0;
4417         qmat = s->q_intra_matrix[qscale];
4418         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4419     } else {
4420         start_i = 0;
4421         last_non_zero = -1;
4422         qmat = s->q_inter_matrix[qscale];
4423         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4424     }
4425     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4426     threshold2= (threshold1<<1);
4427     for(i=63;i>=start_i;i--) {
4428         j = scantable[i];
4429         level = block[j] * qmat[j];
4430
4431         if(((unsigned)(level+threshold1))>threshold2){
4432             last_non_zero = i;
4433             break;
4434         }else{
4435             block[j]=0;
4436         }
4437     }
4438     for(i=start_i; i<=last_non_zero; i++) {
4439         j = scantable[i];
4440         level = block[j] * qmat[j];
4441
4442 //        if(   bias+level >= (1<<QMAT_SHIFT)
4443 //           || bias-level >= (1<<QMAT_SHIFT)){
4444         if(((unsigned)(level+threshold1))>threshold2){
4445             if(level>0){
4446                 level= (bias + level)>>QMAT_SHIFT;
4447                 block[j]= level;
4448             }else{
4449                 level= (bias - level)>>QMAT_SHIFT;
4450                 block[j]= -level;
4451             }
4452             max |=level;
4453         }else{
4454             block[j]=0;
4455         }
4456     }
4457     *overflow= s->max_qcoeff < max; //overflow might have happened
4458
4459     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4460     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4461         block_permute(block, s->idsp.idct_permutation,
4462                       scantable, last_non_zero);
4463
4464     return last_non_zero;
4465 }
4466
4467 #define OFFSET(x) offsetof(MpegEncContext, x)
4468 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4469 static const AVOption h263_options[] = {
4470     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4471     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4472     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4473     FF_MPV_COMMON_OPTS
4474     { NULL },
4475 };
4476
4477 static const AVClass h263_class = {
4478     .class_name = "H.263 encoder",
4479     .item_name  = av_default_item_name,
4480     .option     = h263_options,
4481     .version    = LIBAVUTIL_VERSION_INT,
4482 };
4483
4484 AVCodec ff_h263_encoder = {
4485     .name           = "h263",
4486     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4487     .type           = AVMEDIA_TYPE_VIDEO,
4488     .id             = AV_CODEC_ID_H263,
4489     .priv_data_size = sizeof(MpegEncContext),
4490     .init           = ff_mpv_encode_init,
4491     .encode2        = ff_mpv_encode_picture,
4492     .close          = ff_mpv_encode_end,
4493     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4494     .priv_class     = &h263_class,
4495 };
4496
4497 static const AVOption h263p_options[] = {
4498     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4499     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4500     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4501     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4502     FF_MPV_COMMON_OPTS
4503     { NULL },
4504 };
4505 static const AVClass h263p_class = {
4506     .class_name = "H.263p encoder",
4507     .item_name  = av_default_item_name,
4508     .option     = h263p_options,
4509     .version    = LIBAVUTIL_VERSION_INT,
4510 };
4511
4512 AVCodec ff_h263p_encoder = {
4513     .name           = "h263p",
4514     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4515     .type           = AVMEDIA_TYPE_VIDEO,
4516     .id             = AV_CODEC_ID_H263P,
4517     .priv_data_size = sizeof(MpegEncContext),
4518     .init           = ff_mpv_encode_init,
4519     .encode2        = ff_mpv_encode_picture,
4520     .close          = ff_mpv_encode_end,
4521     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4522     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4523     .priv_class     = &h263p_class,
4524 };
4525
4526 static const AVClass msmpeg4v2_class = {
4527     .class_name = "msmpeg4v2 encoder",
4528     .item_name  = av_default_item_name,
4529     .option     = ff_mpv_generic_options,
4530     .version    = LIBAVUTIL_VERSION_INT,
4531 };
4532
4533 AVCodec ff_msmpeg4v2_encoder = {
4534     .name           = "msmpeg4v2",
4535     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4536     .type           = AVMEDIA_TYPE_VIDEO,
4537     .id             = AV_CODEC_ID_MSMPEG4V2,
4538     .priv_data_size = sizeof(MpegEncContext),
4539     .init           = ff_mpv_encode_init,
4540     .encode2        = ff_mpv_encode_picture,
4541     .close          = ff_mpv_encode_end,
4542     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4543     .priv_class     = &msmpeg4v2_class,
4544 };
4545
4546 static const AVClass msmpeg4v3_class = {
4547     .class_name = "msmpeg4v3 encoder",
4548     .item_name  = av_default_item_name,
4549     .option     = ff_mpv_generic_options,
4550     .version    = LIBAVUTIL_VERSION_INT,
4551 };
4552
4553 AVCodec ff_msmpeg4v3_encoder = {
4554     .name           = "msmpeg4",
4555     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4556     .type           = AVMEDIA_TYPE_VIDEO,
4557     .id             = AV_CODEC_ID_MSMPEG4V3,
4558     .priv_data_size = sizeof(MpegEncContext),
4559     .init           = ff_mpv_encode_init,
4560     .encode2        = ff_mpv_encode_picture,
4561     .close          = ff_mpv_encode_end,
4562     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4563     .priv_class     = &msmpeg4v3_class,
4564 };
4565
4566 static const AVClass wmv1_class = {
4567     .class_name = "wmv1 encoder",
4568     .item_name  = av_default_item_name,
4569     .option     = ff_mpv_generic_options,
4570     .version    = LIBAVUTIL_VERSION_INT,
4571 };
4572
4573 AVCodec ff_wmv1_encoder = {
4574     .name           = "wmv1",
4575     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4576     .type           = AVMEDIA_TYPE_VIDEO,
4577     .id             = AV_CODEC_ID_WMV1,
4578     .priv_data_size = sizeof(MpegEncContext),
4579     .init           = ff_mpv_encode_init,
4580     .encode2        = ff_mpv_encode_picture,
4581     .close          = ff_mpv_encode_end,
4582     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4583     .priv_class     = &wmv1_class,
4584 };