git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "mpegvideodata.h"
  44 #include "h261.h"
  45 #include "h263.h"
  46 #include "h263data.h"
  47 #include "mjpegenc_common.h"
  48 #include "mathops.h"
  49 #include "mpegutils.h"
  50 #include "mjpegenc.h"
  51 #include "msmpeg4.h"
  52 #include "pixblockdsp.h"
  53 #include "qpeldsp.h"
  54 #include "faandct.h"
  55 #include "thread.h"
  56 #include "aandcttab.h"
  57 #include "flv.h"
  58 #include "mpeg4video.h"
  59 #include "internal.h"
  60 #include "bytestream.h"
  61 #include "wmv2.h"
  62 #include "rv10.h"
  63 #include <limits.h>
  64
  65 #define QUANT_BIAS_SHIFT 8
  66
  67 #define QMAT_SHIFT_MMX 16
  68 #define QMAT_SHIFT 22
  69
  70 static int encode_picture(MpegEncContext *s, int picture_number);
  71 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  72 static int sse_mb(MpegEncContext *s);
  73 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  74 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  75
  76 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  77 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  78
  79 const AVOption ff_mpv_generic_options[] = {
  80     FF_MPV_COMMON_OPTS
  81     { NULL },
  82 };
  83
  84 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  85                        uint16_t (*qmat16)[2][64],
  86                        const uint16_t *quant_matrix,
  87                        int bias, int qmin, int qmax, int intra)
  88 {
  89     FDCTDSPContext *fdsp = &s->fdsp;
  90     int qscale;
  91     int shift = 0;
  92
  93     for (qscale = qmin; qscale <= qmax; qscale++) {
  94         int i;
  95         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  96 #if CONFIG_FAANDCT
  97             fdsp->fdct == ff_faandct            ||
  98 #endif /* CONFIG_FAANDCT */
  99             fdsp->fdct == ff_jpeg_fdct_islow_10) {
 100             for (i = 0; i < 64; i++) {
 101                 const int j = s->idsp.idct_permutation[i];
 102                 int64_t den = (int64_t) qscale * quant_matrix[j];
 103                 /* 16 <= qscale * quant_matrix[i] <= 7905
 104                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 105                  *             19952 <=              x  <= 249205026
 106                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 107                  *           3444240 >= (1 << 36) / (x) >= 275 */
 108
 109                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 110             }
 111         } else if (fdsp->fdct == ff_fdct_ifast) {
 112             for (i = 0; i < 64; i++) {
 113                 const int j = s->idsp.idct_permutation[i];
 114                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
 115                 /* 16 <= qscale * quant_matrix[i] <= 7905
 116                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 117                  *             19952 <=              x  <= 249205026
 118                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 119                  *           3444240 >= (1 << 36) / (x) >= 275 */
 120
 121                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
 122             }
 123         } else {
 124             for (i = 0; i < 64; i++) {
 125                 const int j = s->idsp.idct_permutation[i];
 126                 int64_t den = (int64_t) qscale * quant_matrix[j];
 127                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 128                  * Assume x = qscale * quant_matrix[i]
 129                  * So             16 <=              x  <= 7905
 130                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 131                  * so          32768 >= (1 << 19) / (x) >= 67 */
 132                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 133                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 134                 //                    (qscale * quant_matrix[i]);
 135                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
 136
 137                 if (qmat16[qscale][0][i] == 0 ||
 138                     qmat16[qscale][0][i] == 128 * 256)
 139                     qmat16[qscale][0][i] = 128 * 256 - 1;
 140                 qmat16[qscale][1][i] =
 141                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 142                                 qmat16[qscale][0][i]);
 143             }
 144         }
 145
 146         for (i = intra; i < 64; i++) {
 147             int64_t max = 8191;
 148             if (fdsp->fdct == ff_fdct_ifast) {
 149                 max = (8191LL * ff_aanscales[i]) >> 14;
 150             }
 151             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 152                 shift++;
 153             }
 154         }
 155     }
 156     if (shift) {
 157         av_log(NULL, AV_LOG_INFO,
 158                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 159                QMAT_SHIFT - shift);
 160     }
 161 }
 162
 163 static inline void update_qscale(MpegEncContext *s)
 164 {
 165     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 166                 (FF_LAMBDA_SHIFT + 7);
 167     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 168
 169     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 170                  FF_LAMBDA_SHIFT;
 171 }
 172
 173 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 174 {
 175     int i;
 176
 177     if (matrix) {
 178         put_bits(pb, 1, 1);
 179         for (i = 0; i < 64; i++) {
 180             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 181         }
 182     } else
 183         put_bits(pb, 1, 0);
 184 }
 185
 186 /**
 187  * init s->current_picture.qscale_table from s->lambda_table
 188  */
 189 void ff_init_qscale_tab(MpegEncContext *s)
 190 {
 191     int8_t * const qscale_table = s->current_picture.qscale_table;
 192     int i;
 193
 194     for (i = 0; i < s->mb_num; i++) {
 195         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 196         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 197         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 198                                                   s->avctx->qmax);
 199     }
 200 }
 201
 202 static void update_duplicate_context_after_me(MpegEncContext *dst,
 203                                               MpegEncContext *src)
 204 {
 205 #define COPY(a) dst->a= src->a
 206     COPY(pict_type);
 207     COPY(current_picture);
 208     COPY(f_code);
 209     COPY(b_code);
 210     COPY(qscale);
 211     COPY(lambda);
 212     COPY(lambda2);
 213     COPY(picture_in_gop_number);
 214     COPY(gop_picture_number);
 215     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 216     COPY(progressive_frame);    // FIXME don't set in encode_header
 217     COPY(partitioned_frame);    // FIXME don't set in encode_header
 218 #undef COPY
 219 }
 220
 221 /**
 222  * Set the given MpegEncContext to defaults for encoding.
 223  * the changed fields will not depend upon the prior state of the MpegEncContext.
 224  */
 225 static void mpv_encode_defaults(MpegEncContext *s)
 226 {
 227     int i;
 228     ff_mpv_common_defaults(s);
 229
 230     for (i = -16; i < 16; i++) {
 231         default_fcode_tab[i + MAX_MV] = 1;
 232     }
 233     s->me.mv_penalty = default_mv_penalty;
 234     s->fcode_tab     = default_fcode_tab;
 235
 236     s->input_picture_number  = 0;
 237     s->picture_in_gop_number = 0;
 238 }
 239
 240 /* init video encoder */
 241 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 242 {
 243     MpegEncContext *s = avctx->priv_data;
 244     AVCPBProperties *cpb_props;
 245     int i, ret, format_supported;
 246
 247     mpv_encode_defaults(s);
 248
 249     switch (avctx->codec_id) {
 250     case AV_CODEC_ID_MPEG2VIDEO:
 251         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 252             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 253             av_log(avctx, AV_LOG_ERROR,
 254                    "only YUV420 and YUV422 are supported\n");
 255             return -1;
 256         }
 257         break;
 258     case AV_CODEC_ID_MJPEG:
 259         format_supported = 0;
 260         /* JPEG color space */
 261         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 262             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 263             (avctx->color_range == AVCOL_RANGE_JPEG &&
 264              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 265               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
 266             format_supported = 1;
 267         /* MPEG color space */
 268         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 269                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 270                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
 271             format_supported = 1;
 272
 273         if (!format_supported) {
 274             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 275             return -1;
 276         }
 277         break;
 278     default:
 279         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 280             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 281             return -1;
 282         }
 283     }
 284
 285     switch (avctx->pix_fmt) {
 286     case AV_PIX_FMT_YUVJ422P:
 287     case AV_PIX_FMT_YUV422P:
 288         s->chroma_format = CHROMA_422;
 289         break;
 290     case AV_PIX_FMT_YUVJ420P:
 291     case AV_PIX_FMT_YUV420P:
 292     default:
 293         s->chroma_format = CHROMA_420;
 294         break;
 295     }
 296
 297 #if FF_API_PRIVATE_OPT
 298 FF_DISABLE_DEPRECATION_WARNINGS
 299     if (avctx->rtp_payload_size)
 300         s->rtp_payload_size = avctx->rtp_payload_size;
 301     if (avctx->me_penalty_compensation)
 302         s->me_penalty_compensation = avctx->me_penalty_compensation;
 303     if (avctx->pre_me)
 304         s->me_pre = avctx->pre_me;
 305 FF_ENABLE_DEPRECATION_WARNINGS
 306 #endif
 307
 308     s->bit_rate = avctx->bit_rate;
 309     s->width    = avctx->width;
 310     s->height   = avctx->height;
 311     if (avctx->gop_size > 600 &&
 312         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 313         av_log(avctx, AV_LOG_ERROR,
 314                "Warning keyframe interval too large! reducing it ...\n");
 315         avctx->gop_size = 600;
 316     }
 317     s->gop_size     = avctx->gop_size;
 318     s->avctx        = avctx;
 319     if (avctx->max_b_frames > MAX_B_FRAMES) {
 320         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 321                "is %d.\n", MAX_B_FRAMES);
 322     }
 323     s->max_b_frames = avctx->max_b_frames;
 324     s->codec_id     = avctx->codec->id;
 325     s->strict_std_compliance = avctx->strict_std_compliance;
 326     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
 327     s->rtp_mode           = !!s->rtp_payload_size;
 328     s->intra_dc_precision = avctx->intra_dc_precision;
 329     s->user_specified_pts = AV_NOPTS_VALUE;
 330
 331     if (s->gop_size <= 1) {
 332         s->intra_only = 1;
 333         s->gop_size   = 12;
 334     } else {
 335         s->intra_only = 0;
 336     }
 337
 338 #if FF_API_MOTION_EST
 339 FF_DISABLE_DEPRECATION_WARNINGS
 340     s->me_method = avctx->me_method;
 341 FF_ENABLE_DEPRECATION_WARNINGS
 342 #endif
 343
 344     /* Fixed QSCALE */
 345     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
 346
 347 #if FF_API_MPV_OPT
 348     FF_DISABLE_DEPRECATION_WARNINGS
 349     if (avctx->border_masking != 0.0)
 350         s->border_masking = avctx->border_masking;
 351     FF_ENABLE_DEPRECATION_WARNINGS
 352 #endif
 353
 354     s->adaptive_quant = (s->avctx->lumi_masking ||
 355                          s->avctx->dark_masking ||
 356                          s->avctx->temporal_cplx_masking ||
 357                          s->avctx->spatial_cplx_masking  ||
 358                          s->avctx->p_masking      ||
 359                          s->border_masking ||
 360                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 361                         !s->fixed_qscale;
 362
 363     s->loop_filter = !!(s->avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
 364
 365     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 366         av_log(avctx, AV_LOG_ERROR,
 367                "a vbv buffer size is needed, "
 368                "for encoding with a maximum bitrate\n");
 369         return -1;
 370     }
 371
 372     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 373         av_log(avctx, AV_LOG_INFO,
 374                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 375     }
 376
 377     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 378         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 379         return -1;
 380     }
 381
 382     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 383         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 384         return -1;
 385     }
 386
 387     if (avctx->rc_max_rate &&
 388         avctx->rc_max_rate == avctx->bit_rate &&
 389         avctx->rc_max_rate != avctx->rc_min_rate) {
 390         av_log(avctx, AV_LOG_INFO,
 391                "impossible bitrate constraints, this will fail\n");
 392     }
 393
 394     if (avctx->rc_buffer_size &&
 395         avctx->bit_rate * (int64_t)avctx->time_base.num >
 396             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 397         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 398         return -1;
 399     }
 400
 401     if (!s->fixed_qscale &&
 402         avctx->bit_rate * av_q2d(avctx->time_base) >
 403             avctx->bit_rate_tolerance) {
 404         av_log(avctx, AV_LOG_ERROR,
 405                "bitrate tolerance too small for bitrate\n");
 406         return -1;
 407     }
 408
 409     if (s->avctx->rc_max_rate &&
 410         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 411         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 412          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 413         90000LL * (avctx->rc_buffer_size - 1) >
 414             s->avctx->rc_max_rate * 0xFFFFLL) {
 415         av_log(avctx, AV_LOG_INFO,
 416                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 417                "specified vbv buffer is too large for the given bitrate!\n");
 418     }
 419
 420     if ((s->avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
 421         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 422         s->codec_id != AV_CODEC_ID_FLV1) {
 423         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 424         return -1;
 425     }
 426
 427     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 428         av_log(avctx, AV_LOG_ERROR,
 429                "OBMC is only supported with simple mb decision\n");
 430         return -1;
 431     }
 432
 433     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 434         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 435         return -1;
 436     }
 437
 438     if (s->max_b_frames                    &&
 439         s->codec_id != AV_CODEC_ID_MPEG4      &&
 440         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 441         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 442         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 443         return -1;
 444     }
 445
 446     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 447          s->codec_id == AV_CODEC_ID_H263  ||
 448          s->codec_id == AV_CODEC_ID_H263P) &&
 449         (avctx->sample_aspect_ratio.num > 255 ||
 450          avctx->sample_aspect_ratio.den > 255)) {
 451         av_log(avctx, AV_LOG_ERROR,
 452                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
 453                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 454         return -1;
 455     }
 456
 457     if ((s->avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
 458         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 459         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 460         return -1;
 461     }
 462
 463 #if FF_API_PRIVATE_OPT
 464     FF_DISABLE_DEPRECATION_WARNINGS
 465     if (avctx->mpeg_quant)
 466         s->mpeg_quant = avctx->mpeg_quant;
 467     FF_ENABLE_DEPRECATION_WARNINGS
 468 #endif
 469
 470     // FIXME mpeg2 uses that too
 471     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
 472         av_log(avctx, AV_LOG_ERROR,
 473                "mpeg2 style quantization not supported by codec\n");
 474         return -1;
 475     }
 476
 477     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 478         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 479         return -1;
 480     }
 481
 482     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 483         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 484         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 485         return -1;
 486     }
 487
 488 #if FF_API_PRIVATE_OPT
 489 FF_DISABLE_DEPRECATION_WARNINGS
 490     if (avctx->scenechange_threshold)
 491         s->scenechange_threshold = avctx->scenechange_threshold;
 492 FF_ENABLE_DEPRECATION_WARNINGS
 493 #endif
 494
 495     if (s->scenechange_threshold < 1000000000 &&
 496         (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
 497         av_log(avctx, AV_LOG_ERROR,
 498                "closed gop with scene change detection are not supported yet, "
 499                "set threshold to 1000000000\n");
 500         return -1;
 501     }
 502
 503     if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
 504         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 505             av_log(avctx, AV_LOG_ERROR,
 506                   "low delay forcing is only available for mpeg2\n");
 507             return -1;
 508         }
 509         if (s->max_b_frames != 0) {
 510             av_log(avctx, AV_LOG_ERROR,
 511                    "b frames cannot be used with low delay\n");
 512             return -1;
 513         }
 514     }
 515
 516     if (s->q_scale_type == 1) {
 517         if (avctx->qmax > 12) {
 518             av_log(avctx, AV_LOG_ERROR,
 519                    "non linear quant only supports qmax <= 12 currently\n");
 520             return -1;
 521         }
 522     }
 523
 524     if (avctx->slices > 1 &&
 525         (avctx->codec_id == AV_CODEC_ID_FLV1 || avctx->codec_id == AV_CODEC_ID_H261)) {
 526         av_log(avctx, AV_LOG_ERROR, "Multiple slices are not supported by this codec\n");
 527         return AVERROR(EINVAL);
 528     }
 529
 530     if (s->avctx->thread_count > 1         &&
 531         s->codec_id != AV_CODEC_ID_MPEG4      &&
 532         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 533         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 534         (s->codec_id != AV_CODEC_ID_H263P)) {
 535         av_log(avctx, AV_LOG_ERROR,
 536                "multi threaded encoding not supported by codec\n");
 537         return -1;
 538     }
 539
 540     if (s->avctx->thread_count < 1) {
 541         av_log(avctx, AV_LOG_ERROR,
 542                "automatic thread number detection not supported by codec,"
 543                "patch welcome\n");
 544         return -1;
 545     }
 546
 547     if (!avctx->time_base.den || !avctx->time_base.num) {
 548         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 549         return -1;
 550     }
 551
 552 #if FF_API_PRIVATE_OPT
 553 FF_DISABLE_DEPRECATION_WARNINGS
 554     if (avctx->b_frame_strategy)
 555         s->b_frame_strategy = avctx->b_frame_strategy;
 556     if (avctx->b_sensitivity != 40)
 557         s->b_sensitivity = avctx->b_sensitivity;
 558 FF_ENABLE_DEPRECATION_WARNINGS
 559 #endif
 560
 561     if (s->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
 562         av_log(avctx, AV_LOG_INFO,
 563                "notice: b_frame_strategy only affects the first pass\n");
 564         s->b_frame_strategy = 0;
 565     }
 566
 567     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 568     if (i > 1) {
 569         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 570         avctx->time_base.den /= i;
 571         avctx->time_base.num /= i;
 572         //return -1;
 573     }
 574
 575     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 576         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
 577         // (a + x * 3 / 8) / x
 578         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 579         s->inter_quant_bias = 0;
 580     } else {
 581         s->intra_quant_bias = 0;
 582         // (a - x / 4) / x
 583         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 584     }
 585
 586 #if FF_API_QUANT_BIAS
 587 FF_DISABLE_DEPRECATION_WARNINGS
 588     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 589         s->intra_quant_bias = avctx->intra_quant_bias;
 590     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 591         s->inter_quant_bias = avctx->inter_quant_bias;
 592 FF_ENABLE_DEPRECATION_WARNINGS
 593 #endif
 594
 595     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 596         s->avctx->time_base.den > (1 << 16) - 1) {
 597         av_log(avctx, AV_LOG_ERROR,
 598                "timebase %d/%d not supported by MPEG 4 standard, "
 599                "the maximum admitted value for the timebase denominator "
 600                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 601                (1 << 16) - 1);
 602         return -1;
 603     }
 604     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 605
 606     switch (avctx->codec->id) {
 607     case AV_CODEC_ID_MPEG1VIDEO:
 608         s->out_format = FMT_MPEG1;
 609         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
 610         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 611         break;
 612     case AV_CODEC_ID_MPEG2VIDEO:
 613         s->out_format = FMT_MPEG1;
 614         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
 615         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 616         s->rtp_mode   = 1;
 617         break;
 618     case AV_CODEC_ID_MJPEG:
 619         s->out_format = FMT_MJPEG;
 620         s->intra_only = 1; /* force intra only for jpeg */
 621         if (!CONFIG_MJPEG_ENCODER ||
 622             ff_mjpeg_encode_init(s) < 0)
 623             return -1;
 624         avctx->delay = 0;
 625         s->low_delay = 1;
 626         break;
 627     case AV_CODEC_ID_H261:
 628         if (!CONFIG_H261_ENCODER)
 629             return -1;
 630         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 631             av_log(avctx, AV_LOG_ERROR,
 632                    "The specified picture size of %dx%d is not valid for the "
 633                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 634                     s->width, s->height);
 635             return -1;
 636         }
 637         s->out_format = FMT_H261;
 638         avctx->delay  = 0;
 639         s->low_delay  = 1;
 640         s->rtp_mode   = 0; /* Sliced encoding not supported */
 641         break;
 642     case AV_CODEC_ID_H263:
 643         if (!CONFIG_H263_ENCODER)
 644         return -1;
 645         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 646                              s->width, s->height) == 8) {
 647             av_log(avctx, AV_LOG_INFO,
 648                    "The specified picture size of %dx%d is not valid for "
 649                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 650                    "352x288, 704x576, and 1408x1152."
 651                    "Try H.263+.\n", s->width, s->height);
 652             return -1;
 653         }
 654         s->out_format = FMT_H263;
 655         avctx->delay  = 0;
 656         s->low_delay  = 1;
 657         break;
 658     case AV_CODEC_ID_H263P:
 659         s->out_format = FMT_H263;
 660         s->h263_plus  = 1;
 661         /* Fx */
 662         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
 663         s->modified_quant  = s->h263_aic;
 664         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 665         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 666
 667         /* /Fx */
 668         /* These are just to be sure */
 669         avctx->delay = 0;
 670         s->low_delay = 1;
 671         break;
 672     case AV_CODEC_ID_FLV1:
 673         s->out_format      = FMT_H263;
 674         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 675         s->unrestricted_mv = 1;
 676         s->rtp_mode  = 0; /* don't allow GOB */
 677         avctx->delay = 0;
 678         s->low_delay = 1;
 679         break;
 680     case AV_CODEC_ID_RV10:
 681         s->out_format = FMT_H263;
 682         avctx->delay  = 0;
 683         s->low_delay  = 1;
 684         break;
 685     case AV_CODEC_ID_RV20:
 686         s->out_format      = FMT_H263;
 687         avctx->delay       = 0;
 688         s->low_delay       = 1;
 689         s->modified_quant  = 1;
 690         s->h263_aic        = 1;
 691         s->h263_plus       = 1;
 692         s->loop_filter     = 1;
 693         s->unrestricted_mv = 0;
 694         break;
 695     case AV_CODEC_ID_MPEG4:
 696         s->out_format      = FMT_H263;
 697         s->h263_pred       = 1;
 698         s->unrestricted_mv = 1;
 699         s->low_delay       = s->max_b_frames ? 0 : 1;
 700         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 701         break;
 702     case AV_CODEC_ID_MSMPEG4V2:
 703         s->out_format      = FMT_H263;
 704         s->h263_pred       = 1;
 705         s->unrestricted_mv = 1;
 706         s->msmpeg4_version = 2;
 707         avctx->delay       = 0;
 708         s->low_delay       = 1;
 709         break;
 710     case AV_CODEC_ID_MSMPEG4V3:
 711         s->out_format        = FMT_H263;
 712         s->h263_pred         = 1;
 713         s->unrestricted_mv   = 1;
 714         s->msmpeg4_version   = 3;
 715         s->flipflop_rounding = 1;
 716         avctx->delay         = 0;
 717         s->low_delay         = 1;
 718         break;
 719     case AV_CODEC_ID_WMV1:
 720         s->out_format        = FMT_H263;
 721         s->h263_pred         = 1;
 722         s->unrestricted_mv   = 1;
 723         s->msmpeg4_version   = 4;
 724         s->flipflop_rounding = 1;
 725         avctx->delay         = 0;
 726         s->low_delay         = 1;
 727         break;
 728     case AV_CODEC_ID_WMV2:
 729         s->out_format        = FMT_H263;
 730         s->h263_pred         = 1;
 731         s->unrestricted_mv   = 1;
 732         s->msmpeg4_version   = 5;
 733         s->flipflop_rounding = 1;
 734         avctx->delay         = 0;
 735         s->low_delay         = 1;
 736         break;
 737     default:
 738         return -1;
 739     }
 740
 741 #if FF_API_PRIVATE_OPT
 742     FF_DISABLE_DEPRECATION_WARNINGS
 743     if (avctx->noise_reduction)
 744         s->noise_reduction = avctx->noise_reduction;
 745     FF_ENABLE_DEPRECATION_WARNINGS
 746 #endif
 747
 748     avctx->has_b_frames = !s->low_delay;
 749
 750     s->encoding = 1;
 751
 752     s->progressive_frame    =
 753     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
 754                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
 755                                 s->alternate_scan);
 756
 757     /* init */
 758     ff_mpv_idct_init(s);
 759     if (ff_mpv_common_init(s) < 0)
 760         return -1;
 761
 762     if (ARCH_X86)
 763         ff_mpv_encode_init_x86(s);
 764
 765     ff_fdctdsp_init(&s->fdsp, avctx);
 766     ff_me_cmp_init(&s->mecc, avctx);
 767     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 768     ff_pixblockdsp_init(&s->pdsp, avctx);
 769     ff_qpeldsp_init(&s->qdsp);
 770
 771     if (s->msmpeg4_version) {
 772         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 773                           2 * 2 * (MAX_LEVEL + 1) *
 774                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 775     }
 776     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 777
 778     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 779     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 780     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 781     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 782     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 783                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 784     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 785                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 786
 787
 788     if (s->noise_reduction) {
 789         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 790                           2 * 64 * sizeof(uint16_t), fail);
 791     }
 792
 793     if (CONFIG_H263_ENCODER)
 794         ff_h263dsp_init(&s->h263dsp);
 795     if (!s->dct_quantize)
 796         s->dct_quantize = ff_dct_quantize_c;
 797     if (!s->denoise_dct)
 798         s->denoise_dct  = denoise_dct_c;
 799     s->fast_dct_quantize = s->dct_quantize;
 800     if (avctx->trellis)
 801         s->dct_quantize  = dct_quantize_trellis_c;
 802
 803     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 804         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 805
 806     if (s->slice_context_count > 1) {
 807         s->rtp_mode = 1;
 808
 809         if (avctx->codec_id == AV_CODEC_ID_H263 || avctx->codec_id == AV_CODEC_ID_H263P)
 810             s->h263_slice_structured = 1;
 811     }
 812
 813     s->quant_precision = 5;
 814
 815 #if FF_API_PRIVATE_OPT
 816 FF_DISABLE_DEPRECATION_WARNINGS
 817     if (avctx->frame_skip_threshold)
 818         s->frame_skip_threshold = avctx->frame_skip_threshold;
 819     if (avctx->frame_skip_factor)
 820         s->frame_skip_factor = avctx->frame_skip_factor;
 821     if (avctx->frame_skip_exp)
 822         s->frame_skip_exp = avctx->frame_skip_exp;
 823     if (avctx->frame_skip_cmp != FF_CMP_DCTMAX)
 824         s->frame_skip_cmp = avctx->frame_skip_cmp;
 825 FF_ENABLE_DEPRECATION_WARNINGS
 826 #endif
 827
 828     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 829     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->frame_skip_cmp);
 830
 831     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 832         ff_h261_encode_init(s);
 833     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 834         ff_h263_encode_init(s);
 835     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 836         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
 837             return ret;
 838     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 839         && s->out_format == FMT_MPEG1)
 840         ff_mpeg1_encode_init(s);
 841
 842     /* init q matrix */
 843     for (i = 0; i < 64; i++) {
 844         int j = s->idsp.idct_permutation[i];
 845         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 846             s->mpeg_quant) {
 847             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 848             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 849         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 850             s->intra_matrix[j] =
 851             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 852         } else {
 853             /* mpeg1/2 */
 854             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 855             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 856         }
 857         if (s->avctx->intra_matrix)
 858             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 859         if (s->avctx->inter_matrix)
 860             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 861     }
 862
 863     /* precompute matrix */
 864     /* for mjpeg, we do include qscale in the matrix */
 865     if (s->out_format != FMT_MJPEG) {
 866         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 867                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 868                           31, 1);
 869         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 870                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 871                           31, 0);
 872     }
 873
 874     if (ff_rate_control_init(s) < 0)
 875         return -1;
 876
 877 #if FF_API_ERROR_RATE
 878     FF_DISABLE_DEPRECATION_WARNINGS
 879     if (avctx->error_rate)
 880         s->error_rate = avctx->error_rate;
 881     FF_ENABLE_DEPRECATION_WARNINGS;
 882 #endif
 883
 884 #if FF_API_NORMALIZE_AQP
 885     FF_DISABLE_DEPRECATION_WARNINGS
 886     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 887         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 888     FF_ENABLE_DEPRECATION_WARNINGS;
 889 #endif
 890
 891 #if FF_API_MV0
 892     FF_DISABLE_DEPRECATION_WARNINGS
 893     if (avctx->flags & CODEC_FLAG_MV0)
 894         s->mpv_flags |= FF_MPV_FLAG_MV0;
 895     FF_ENABLE_DEPRECATION_WARNINGS
 896 #endif
 897
 898 #if FF_API_MPV_OPT
 899     FF_DISABLE_DEPRECATION_WARNINGS
 900     if (avctx->rc_qsquish != 0.0)
 901         s->rc_qsquish = avctx->rc_qsquish;
 902     if (avctx->rc_qmod_amp != 0.0)
 903         s->rc_qmod_amp = avctx->rc_qmod_amp;
 904     if (avctx->rc_qmod_freq)
 905         s->rc_qmod_freq = avctx->rc_qmod_freq;
 906     if (avctx->rc_buffer_aggressivity != 1.0)
 907         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
 908     if (avctx->rc_initial_cplx != 0.0)
 909         s->rc_initial_cplx = avctx->rc_initial_cplx;
 910     if (avctx->lmin)
 911         s->lmin = avctx->lmin;
 912     if (avctx->lmax)
 913         s->lmax = avctx->lmax;
 914
 915     if (avctx->rc_eq) {
 916         av_freep(&s->rc_eq);
 917         s->rc_eq = av_strdup(avctx->rc_eq);
 918         if (!s->rc_eq)
 919             return AVERROR(ENOMEM);
 920     }
 921     FF_ENABLE_DEPRECATION_WARNINGS
 922 #endif
 923
 924 #if FF_API_PRIVATE_OPT
 925     FF_DISABLE_DEPRECATION_WARNINGS
 926     if (avctx->brd_scale)
 927         s->brd_scale = avctx->brd_scale;
 928
 929     if (avctx->prediction_method)
 930         s->pred = avctx->prediction_method + 1;
 931     FF_ENABLE_DEPRECATION_WARNINGS
 932 #endif
 933
 934     if (s->b_frame_strategy == 2) {
 935         for (i = 0; i < s->max_b_frames + 2; i++) {
 936             s->tmp_frames[i] = av_frame_alloc();
 937             if (!s->tmp_frames[i])
 938                 return AVERROR(ENOMEM);
 939
 940             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 941             s->tmp_frames[i]->width  = s->width  >> s->brd_scale;
 942             s->tmp_frames[i]->height = s->height >> s->brd_scale;
 943
 944             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 945             if (ret < 0)
 946                 return ret;
 947         }
 948     }
 949
 950     cpb_props = ff_add_cpb_side_data(avctx);
 951     if (!cpb_props)
 952         return AVERROR(ENOMEM);
 953     cpb_props->max_bitrate = avctx->rc_max_rate;
 954     cpb_props->min_bitrate = avctx->rc_min_rate;
 955     cpb_props->avg_bitrate = avctx->bit_rate;
 956     cpb_props->buffer_size = avctx->rc_buffer_size;
 957
 958     return 0;
 959 fail:
 960     ff_mpv_encode_end(avctx);
 961     return AVERROR_UNKNOWN;
 962 }
 963
 964 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
 965 {
 966     MpegEncContext *s = avctx->priv_data;
 967     int i;
 968
 969     ff_rate_control_uninit(s);
 970
 971     ff_mpv_common_end(s);
 972     if (CONFIG_MJPEG_ENCODER &&
 973         s->out_format == FMT_MJPEG)
 974         ff_mjpeg_encode_close(s);
 975
 976     av_freep(&avctx->extradata);
 977
 978     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 979         av_frame_free(&s->tmp_frames[i]);
 980
 981     ff_free_picture_tables(&s->new_picture);
 982     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
 983
 984     av_freep(&s->avctx->stats_out);
 985     av_freep(&s->ac_stats);
 986
 987     av_freep(&s->q_intra_matrix);
 988     av_freep(&s->q_inter_matrix);
 989     av_freep(&s->q_intra_matrix16);
 990     av_freep(&s->q_inter_matrix16);
 991     av_freep(&s->input_picture);
 992     av_freep(&s->reordered_input_picture);
 993     av_freep(&s->dct_offset);
 994
 995     return 0;
 996 }
 997
 998 static int get_sae(uint8_t *src, int ref, int stride)
 999 {
1000     int x,y;
1001     int acc = 0;
1002
1003     for (y = 0; y < 16; y++) {
1004         for (x = 0; x < 16; x++) {
1005             acc += FFABS(src[x + y * stride] - ref);
1006         }
1007     }
1008
1009     return acc;
1010 }
1011
1012 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1013                            uint8_t *ref, int stride)
1014 {
1015     int x, y, w, h;
1016     int acc = 0;
1017
1018     w = s->width  & ~15;
1019     h = s->height & ~15;
1020
1021     for (y = 0; y < h; y += 16) {
1022         for (x = 0; x < w; x += 16) {
1023             int offset = x + y * stride;
1024             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1025                                       stride, 16);
1026             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1027             int sae  = get_sae(src + offset, mean, stride);
1028
1029             acc += sae + 500 < sad;
1030         }
1031     }
1032     return acc;
1033 }
1034
1035 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1036 {
1037     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1038                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1039                             s->mb_stride, s->mb_height, s->b8_stride,
1040                             &s->linesize, &s->uvlinesize);
1041 }
1042
1043 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1044 {
1045     Picture *pic = NULL;
1046     int64_t pts;
1047     int i, display_picture_number = 0, ret;
1048     int encoding_delay = s->max_b_frames ? s->max_b_frames
1049                                          : (s->low_delay ? 0 : 1);
1050     int flush_offset = 1;
1051     int direct = 1;
1052
1053     if (pic_arg) {
1054         pts = pic_arg->pts;
1055         display_picture_number = s->input_picture_number++;
1056
1057         if (pts != AV_NOPTS_VALUE) {
1058             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1059                 int64_t time = pts;
1060                 int64_t last = s->user_specified_pts;
1061
1062                 if (time <= last) {
1063                     av_log(s->avctx, AV_LOG_ERROR,
1064                            "Error, Invalid timestamp=%"PRId64", "
1065                            "last=%"PRId64"\n", pts, s->user_specified_pts);
1066                     return -1;
1067                 }
1068
1069                 if (!s->low_delay && display_picture_number == 1)
1070                     s->dts_delta = time - last;
1071             }
1072             s->user_specified_pts = pts;
1073         } else {
1074             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1075                 s->user_specified_pts =
1076                 pts = s->user_specified_pts + 1;
1077                 av_log(s->avctx, AV_LOG_INFO,
1078                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1079                        pts);
1080             } else {
1081                 pts = display_picture_number;
1082             }
1083         }
1084
1085         if (!pic_arg->buf[0] ||
1086             pic_arg->linesize[0] != s->linesize ||
1087             pic_arg->linesize[1] != s->uvlinesize ||
1088             pic_arg->linesize[2] != s->uvlinesize)
1089             direct = 0;
1090         if ((s->width & 15) || (s->height & 15))
1091             direct = 0;
1092
1093         ff_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
1094                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1095
1096         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1097         if (i < 0)
1098             return i;
1099
1100         pic = &s->picture[i];
1101         pic->reference = 3;
1102
1103         if (direct) {
1104             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1105                 return ret;
1106         }
1107         ret = alloc_picture(s, pic, direct);
1108         if (ret < 0)
1109             return ret;
1110
1111         if (!direct) {
1112             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1113                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1114                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1115                 // empty
1116             } else {
1117                 int h_chroma_shift, v_chroma_shift;
1118                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1119                                                  &h_chroma_shift,
1120                                                  &v_chroma_shift);
1121
1122                 for (i = 0; i < 3; i++) {
1123                     int src_stride = pic_arg->linesize[i];
1124                     int dst_stride = i ? s->uvlinesize : s->linesize;
1125                     int h_shift = i ? h_chroma_shift : 0;
1126                     int v_shift = i ? v_chroma_shift : 0;
1127                     int w = s->width  >> h_shift;
1128                     int h = s->height >> v_shift;
1129                     uint8_t *src = pic_arg->data[i];
1130                     uint8_t *dst = pic->f->data[i];
1131
1132                     if (!s->avctx->rc_buffer_size)
1133                         dst += INPLACE_OFFSET;
1134
1135                     if (src_stride == dst_stride)
1136                         memcpy(dst, src, src_stride * h);
1137                     else {
1138                         int h2 = h;
1139                         uint8_t *dst2 = dst;
1140                         while (h2--) {
1141                             memcpy(dst2, src, w);
1142                             dst2 += dst_stride;
1143                             src += src_stride;
1144                         }
1145                     }
1146                     if ((s->width & 15) || (s->height & 15)) {
1147                         s->mpvencdsp.draw_edges(dst, dst_stride,
1148                                                 w, h,
1149                                                 16 >> h_shift,
1150                                                 16 >> v_shift,
1151                                                 EDGE_BOTTOM);
1152                     }
1153                 }
1154             }
1155         }
1156         ret = av_frame_copy_props(pic->f, pic_arg);
1157         if (ret < 0)
1158             return ret;
1159
1160         pic->f->display_picture_number = display_picture_number;
1161         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1162     } else {
1163         /* Flushing: When we have not received enough input frames,
1164          * ensure s->input_picture[0] contains the first picture */
1165         for (flush_offset = 0; flush_offset < encoding_delay + 1; flush_offset++)
1166             if (s->input_picture[flush_offset])
1167                 break;
1168
1169         if (flush_offset <= 1)
1170             flush_offset = 1;
1171         else
1172             encoding_delay = encoding_delay - flush_offset + 1;
1173     }
1174
1175     /* shift buffer entries */
1176     for (i = flush_offset; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1177         s->input_picture[i - flush_offset] = s->input_picture[i];
1178
1179     s->input_picture[encoding_delay] = (Picture*) pic;
1180
1181     return 0;
1182 }
1183
1184 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1185 {
1186     int x, y, plane;
1187     int score = 0;
1188     int64_t score64 = 0;
1189
1190     for (plane = 0; plane < 3; plane++) {
1191         const int stride = p->f->linesize[plane];
1192         const int bw = plane ? 1 : 2;
1193         for (y = 0; y < s->mb_height * bw; y++) {
1194             for (x = 0; x < s->mb_width * bw; x++) {
1195                 int off = p->shared ? 0 : 16;
1196                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1197                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1198                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1199
1200                 switch (s->frame_skip_exp) {
1201                 case 0: score    =  FFMAX(score, v);          break;
1202                 case 1: score   += FFABS(v);                  break;
1203                 case 2: score   += v * v;                     break;
1204                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1205                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1206                 }
1207             }
1208         }
1209     }
1210
1211     if (score)
1212         score64 = score;
1213
1214     if (score64 < s->frame_skip_threshold)
1215         return 1;
1216     if (score64 < ((s->frame_skip_factor * (int64_t) s->lambda) >> 8))
1217         return 1;
1218     return 0;
1219 }
1220
1221 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1222 {
1223     AVPacket pkt = { 0 };
1224     int ret, got_output;
1225
1226     av_init_packet(&pkt);
1227     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1228     if (ret < 0)
1229         return ret;
1230
1231     ret = pkt.size;
1232     av_packet_unref(&pkt);
1233     return ret;
1234 }
1235
1236 static int estimate_best_b_count(MpegEncContext *s)
1237 {
1238     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1239     AVCodecContext *c = avcodec_alloc_context3(NULL);
1240     const int scale = s->brd_scale;
1241     int i, j, out_size, p_lambda, b_lambda, lambda2;
1242     int64_t best_rd  = INT64_MAX;
1243     int best_b_count = -1;
1244
1245     if (!c)
1246         return AVERROR(ENOMEM);
1247     assert(scale >= 0 && scale <= 3);
1248
1249     //emms_c();
1250     //s->next_picture_ptr->quality;
1251     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1252     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1253     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1254     if (!b_lambda) // FIXME we should do this somewhere else
1255         b_lambda = p_lambda;
1256     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1257                FF_LAMBDA_SHIFT;
1258
1259     c->width        = s->width  >> scale;
1260     c->height       = s->height >> scale;
1261     c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1262     c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1263     c->mb_decision  = s->avctx->mb_decision;
1264     c->me_cmp       = s->avctx->me_cmp;
1265     c->mb_cmp       = s->avctx->mb_cmp;
1266     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1267     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1268     c->time_base    = s->avctx->time_base;
1269     c->max_b_frames = s->max_b_frames;
1270
1271     if (avcodec_open2(c, codec, NULL) < 0)
1272         return -1;
1273
1274     for (i = 0; i < s->max_b_frames + 2; i++) {
1275         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1276                                                 s->next_picture_ptr;
1277
1278         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1279             pre_input = *pre_input_ptr;
1280
1281             if (!pre_input.shared && i) {
1282                 pre_input.f->data[0] += INPLACE_OFFSET;
1283                 pre_input.f->data[1] += INPLACE_OFFSET;
1284                 pre_input.f->data[2] += INPLACE_OFFSET;
1285             }
1286
1287             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1288                                        s->tmp_frames[i]->linesize[0],
1289                                        pre_input.f->data[0],
1290                                        pre_input.f->linesize[0],
1291                                        c->width, c->height);
1292             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1293                                        s->tmp_frames[i]->linesize[1],
1294                                        pre_input.f->data[1],
1295                                        pre_input.f->linesize[1],
1296                                        c->width >> 1, c->height >> 1);
1297             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1298                                        s->tmp_frames[i]->linesize[2],
1299                                        pre_input.f->data[2],
1300                                        pre_input.f->linesize[2],
1301                                        c->width >> 1, c->height >> 1);
1302         }
1303     }
1304
1305     for (j = 0; j < s->max_b_frames + 1; j++) {
1306         int64_t rd = 0;
1307
1308         if (!s->input_picture[j])
1309             break;
1310
1311         c->error[0] = c->error[1] = c->error[2] = 0;
1312
1313         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1314         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1315
1316         out_size = encode_frame(c, s->tmp_frames[0]);
1317
1318         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1319
1320         for (i = 0; i < s->max_b_frames + 1; i++) {
1321             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1322
1323             s->tmp_frames[i + 1]->pict_type = is_p ?
1324                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1325             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1326
1327             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1328
1329             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1330         }
1331
1332         /* get the delayed frames */
1333         while (out_size) {
1334             out_size = encode_frame(c, NULL);
1335             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1336         }
1337
1338         rd += c->error[0] + c->error[1] + c->error[2];
1339
1340         if (rd < best_rd) {
1341             best_rd = rd;
1342             best_b_count = j;
1343         }
1344     }
1345
1346     avcodec_close(c);
1347     av_freep(&c);
1348
1349     return best_b_count;
1350 }
1351
1352 static int select_input_picture(MpegEncContext *s)
1353 {
1354     int i, ret;
1355
1356     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1357         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1358     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1359
1360     /* set next picture type & ordering */
1361     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1362         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1363             !s->next_picture_ptr || s->intra_only) {
1364             s->reordered_input_picture[0] = s->input_picture[0];
1365             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1366             s->reordered_input_picture[0]->f->coded_picture_number =
1367                 s->coded_picture_number++;
1368         } else {
1369             int b_frames = 0;
1370
1371             if (s->frame_skip_threshold || s->frame_skip_factor) {
1372                 if (s->picture_in_gop_number < s->gop_size &&
1373                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1374                     // FIXME check that te gop check above is +-1 correct
1375                     av_frame_unref(s->input_picture[0]->f);
1376
1377                     emms_c();
1378                     ff_vbv_update(s, 0);
1379
1380                     goto no_output_pic;
1381                 }
1382             }
1383
1384             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1385                 for (i = 0; i < s->max_b_frames + 1; i++) {
1386                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1387
1388                     if (pict_num >= s->rc_context.num_entries)
1389                         break;
1390                     if (!s->input_picture[i]) {
1391                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1392                         break;
1393                     }
1394
1395                     s->input_picture[i]->f->pict_type =
1396                         s->rc_context.entry[pict_num].new_pict_type;
1397                 }
1398             }
1399
1400             if (s->b_frame_strategy == 0) {
1401                 b_frames = s->max_b_frames;
1402                 while (b_frames && !s->input_picture[b_frames])
1403                     b_frames--;
1404             } else if (s->b_frame_strategy == 1) {
1405                 for (i = 1; i < s->max_b_frames + 1; i++) {
1406                     if (s->input_picture[i] &&
1407                         s->input_picture[i]->b_frame_score == 0) {
1408                         s->input_picture[i]->b_frame_score =
1409                             get_intra_count(s,
1410                                             s->input_picture[i    ]->f->data[0],
1411                                             s->input_picture[i - 1]->f->data[0],
1412                                             s->linesize) + 1;
1413                     }
1414                 }
1415                 for (i = 0; i < s->max_b_frames + 1; i++) {
1416                     if (!s->input_picture[i] ||
1417                         s->input_picture[i]->b_frame_score - 1 >
1418                             s->mb_num / s->b_sensitivity)
1419                         break;
1420                 }
1421
1422                 b_frames = FFMAX(0, i - 1);
1423
1424                 /* reset scores */
1425                 for (i = 0; i < b_frames + 1; i++) {
1426                     s->input_picture[i]->b_frame_score = 0;
1427                 }
1428             } else if (s->b_frame_strategy == 2) {
1429                 b_frames = estimate_best_b_count(s);
1430             }
1431
1432             emms_c();
1433
1434             for (i = b_frames - 1; i >= 0; i--) {
1435                 int type = s->input_picture[i]->f->pict_type;
1436                 if (type && type != AV_PICTURE_TYPE_B)
1437                     b_frames = i;
1438             }
1439             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1440                 b_frames == s->max_b_frames) {
1441                 av_log(s->avctx, AV_LOG_ERROR,
1442                        "warning, too many b frames in a row\n");
1443             }
1444
1445             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1446                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1447                     s->gop_size > s->picture_in_gop_number) {
1448                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1449                 } else {
1450                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1451                         b_frames = 0;
1452                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1453                 }
1454             }
1455
1456             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1457                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1458                 b_frames--;
1459
1460             s->reordered_input_picture[0] = s->input_picture[b_frames];
1461             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1462                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1463             s->reordered_input_picture[0]->f->coded_picture_number =
1464                 s->coded_picture_number++;
1465             for (i = 0; i < b_frames; i++) {
1466                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1467                 s->reordered_input_picture[i + 1]->f->pict_type =
1468                     AV_PICTURE_TYPE_B;
1469                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1470                     s->coded_picture_number++;
1471             }
1472         }
1473     }
1474 no_output_pic:
1475     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1476
1477     if (s->reordered_input_picture[0]) {
1478         s->reordered_input_picture[0]->reference =
1479            s->reordered_input_picture[0]->f->pict_type !=
1480                AV_PICTURE_TYPE_B ? 3 : 0;
1481
1482         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1483             return ret;
1484
1485         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1486             // input is a shared pix, so we can't modifiy it -> alloc a new
1487             // one & ensure that the shared one is reuseable
1488
1489             Picture *pic;
1490             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1491             if (i < 0)
1492                 return i;
1493             pic = &s->picture[i];
1494
1495             pic->reference = s->reordered_input_picture[0]->reference;
1496             if (alloc_picture(s, pic, 0) < 0) {
1497                 return -1;
1498             }
1499
1500             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1501             if (ret < 0)
1502                 return ret;
1503
1504             /* mark us unused / free shared pic */
1505             av_frame_unref(s->reordered_input_picture[0]->f);
1506             s->reordered_input_picture[0]->shared = 0;
1507
1508             s->current_picture_ptr = pic;
1509         } else {
1510             // input is not a shared pix -> reuse buffer for current_pix
1511             s->current_picture_ptr = s->reordered_input_picture[0];
1512             for (i = 0; i < 4; i++) {
1513                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1514             }
1515         }
1516         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1517         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1518                                        s->current_picture_ptr)) < 0)
1519             return ret;
1520
1521         s->picture_number = s->new_picture.f->display_picture_number;
1522     }
1523     return 0;
1524 }
1525
1526 static void frame_end(MpegEncContext *s)
1527 {
1528     int i;
1529
1530     if (s->unrestricted_mv &&
1531         s->current_picture.reference &&
1532         !s->intra_only) {
1533         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1534         int hshift = desc->log2_chroma_w;
1535         int vshift = desc->log2_chroma_h;
1536         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1537                                 s->h_edge_pos, s->v_edge_pos,
1538                                 EDGE_WIDTH, EDGE_WIDTH,
1539                                 EDGE_TOP | EDGE_BOTTOM);
1540         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1541                                 s->h_edge_pos >> hshift,
1542                                 s->v_edge_pos >> vshift,
1543                                 EDGE_WIDTH >> hshift,
1544                                 EDGE_WIDTH >> vshift,
1545                                 EDGE_TOP | EDGE_BOTTOM);
1546         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1547                                 s->h_edge_pos >> hshift,
1548                                 s->v_edge_pos >> vshift,
1549                                 EDGE_WIDTH >> hshift,
1550                                 EDGE_WIDTH >> vshift,
1551                                 EDGE_TOP | EDGE_BOTTOM);
1552     }
1553
1554     emms_c();
1555
1556     s->last_pict_type                 = s->pict_type;
1557     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1558     if (s->pict_type!= AV_PICTURE_TYPE_B)
1559         s->last_non_b_pict_type = s->pict_type;
1560
1561     if (s->encoding) {
1562         /* release non-reference frames */
1563         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1564             if (!s->picture[i].reference)
1565                 ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1566         }
1567     }
1568
1569 #if FF_API_CODED_FRAME
1570 FF_DISABLE_DEPRECATION_WARNINGS
1571     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1572 FF_ENABLE_DEPRECATION_WARNINGS
1573 #endif
1574 #if FF_API_ERROR_FRAME
1575 FF_DISABLE_DEPRECATION_WARNINGS
1576     memcpy(s->current_picture.f->error, s->current_picture.encoding_error,
1577            sizeof(s->current_picture.encoding_error));
1578 FF_ENABLE_DEPRECATION_WARNINGS
1579 #endif
1580 }
1581
1582 static void update_noise_reduction(MpegEncContext *s)
1583 {
1584     int intra, i;
1585
1586     for (intra = 0; intra < 2; intra++) {
1587         if (s->dct_count[intra] > (1 << 16)) {
1588             for (i = 0; i < 64; i++) {
1589                 s->dct_error_sum[intra][i] >>= 1;
1590             }
1591             s->dct_count[intra] >>= 1;
1592         }
1593
1594         for (i = 0; i < 64; i++) {
1595             s->dct_offset[intra][i] = (s->noise_reduction *
1596                                        s->dct_count[intra] +
1597                                        s->dct_error_sum[intra][i] / 2) /
1598                                       (s->dct_error_sum[intra][i] + 1);
1599         }
1600     }
1601 }
1602
1603 static int frame_start(MpegEncContext *s)
1604 {
1605     int ret;
1606
1607     /* mark & release old frames */
1608     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1609         s->last_picture_ptr != s->next_picture_ptr &&
1610         s->last_picture_ptr->f->buf[0]) {
1611         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1612     }
1613
1614     s->current_picture_ptr->f->pict_type = s->pict_type;
1615     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1616
1617     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1618     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1619                                    s->current_picture_ptr)) < 0)
1620         return ret;
1621
1622     if (s->pict_type != AV_PICTURE_TYPE_B) {
1623         s->last_picture_ptr = s->next_picture_ptr;
1624         if (!s->droppable)
1625             s->next_picture_ptr = s->current_picture_ptr;
1626     }
1627
1628     if (s->last_picture_ptr) {
1629         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1630         if (s->last_picture_ptr->f->buf[0] &&
1631             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1632                                        s->last_picture_ptr)) < 0)
1633             return ret;
1634     }
1635     if (s->next_picture_ptr) {
1636         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1637         if (s->next_picture_ptr->f->buf[0] &&
1638             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1639                                        s->next_picture_ptr)) < 0)
1640             return ret;
1641     }
1642
1643     if (s->picture_structure!= PICT_FRAME) {
1644         int i;
1645         for (i = 0; i < 4; i++) {
1646             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1647                 s->current_picture.f->data[i] +=
1648                     s->current_picture.f->linesize[i];
1649             }
1650             s->current_picture.f->linesize[i] *= 2;
1651             s->last_picture.f->linesize[i]    *= 2;
1652             s->next_picture.f->linesize[i]    *= 2;
1653         }
1654     }
1655
1656     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1657         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1658         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1659     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1660         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1661         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1662     } else {
1663         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1664         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1665     }
1666
1667     if (s->dct_error_sum) {
1668         assert(s->noise_reduction && s->encoding);
1669         update_noise_reduction(s);
1670     }
1671
1672     return 0;
1673 }
1674
1675 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1676                           const AVFrame *pic_arg, int *got_packet)
1677 {
1678     MpegEncContext *s = avctx->priv_data;
1679     int i, stuffing_count, ret;
1680     int context_count = s->slice_context_count;
1681
1682     s->picture_in_gop_number++;
1683
1684     if (load_input_picture(s, pic_arg) < 0)
1685         return -1;
1686
1687     if (select_input_picture(s) < 0) {
1688         return -1;
1689     }
1690
1691     /* output? */
1692     if (s->new_picture.f->data[0]) {
1693         uint8_t *sd;
1694         if (!pkt->data &&
1695             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1696             return ret;
1697         if (s->mb_info) {
1698             s->mb_info_ptr = av_packet_new_side_data(pkt,
1699                                  AV_PKT_DATA_H263_MB_INFO,
1700                                  s->mb_width*s->mb_height*12);
1701             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1702         }
1703
1704         for (i = 0; i < context_count; i++) {
1705             int start_y = s->thread_context[i]->start_mb_y;
1706             int   end_y = s->thread_context[i]->  end_mb_y;
1707             int h       = s->mb_height;
1708             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1709             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1710
1711             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1712         }
1713
1714         s->pict_type = s->new_picture.f->pict_type;
1715         //emms_c();
1716         ret = frame_start(s);
1717         if (ret < 0)
1718             return ret;
1719 vbv_retry:
1720         if (encode_picture(s, s->picture_number) < 0)
1721             return -1;
1722
1723 #if FF_API_STAT_BITS
1724 FF_DISABLE_DEPRECATION_WARNINGS
1725         avctx->header_bits = s->header_bits;
1726         avctx->mv_bits     = s->mv_bits;
1727         avctx->misc_bits   = s->misc_bits;
1728         avctx->i_tex_bits  = s->i_tex_bits;
1729         avctx->p_tex_bits  = s->p_tex_bits;
1730         avctx->i_count     = s->i_count;
1731         // FIXME f/b_count in avctx
1732         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1733         avctx->skip_count  = s->skip_count;
1734 FF_ENABLE_DEPRECATION_WARNINGS
1735 #endif
1736
1737         frame_end(s);
1738
1739         sd = av_packet_new_side_data(pkt, AV_PKT_DATA_QUALITY_FACTOR,
1740                                      sizeof(int));
1741         if (!sd)
1742             return AVERROR(ENOMEM);
1743         *(int *)sd = s->current_picture.f->quality;
1744
1745         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1746             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1747
1748         if (avctx->rc_buffer_size) {
1749             RateControlContext *rcc = &s->rc_context;
1750             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1751
1752             if (put_bits_count(&s->pb) > max_size &&
1753                 s->lambda < s->lmax) {
1754                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1755                                        (s->qscale + 1) / s->qscale);
1756                 if (s->adaptive_quant) {
1757                     int i;
1758                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1759                         s->lambda_table[i] =
1760                             FFMAX(s->lambda_table[i] + 1,
1761                                   s->lambda_table[i] * (s->qscale + 1) /
1762                                   s->qscale);
1763                 }
1764                 s->mb_skipped = 0;        // done in frame_start()
1765                 // done in encode_picture() so we must undo it
1766                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1767                     if (s->flipflop_rounding          ||
1768                         s->codec_id == AV_CODEC_ID_H263P ||
1769                         s->codec_id == AV_CODEC_ID_MPEG4)
1770                         s->no_rounding ^= 1;
1771                 }
1772                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1773                     s->time_base       = s->last_time_base;
1774                     s->last_non_b_time = s->time - s->pp_time;
1775                 }
1776                 for (i = 0; i < context_count; i++) {
1777                     PutBitContext *pb = &s->thread_context[i]->pb;
1778                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1779                 }
1780                 goto vbv_retry;
1781             }
1782
1783             assert(s->avctx->rc_max_rate);
1784         }
1785
1786         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1787             ff_write_pass1_stats(s);
1788
1789         for (i = 0; i < 4; i++) {
1790             s->current_picture_ptr->encoding_error[i] = s->current_picture.encoding_error[i];
1791             avctx->error[i] += s->current_picture_ptr->encoding_error[i];
1792         }
1793
1794         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1795             assert(put_bits_count(&s->pb) == s->header_bits + s->mv_bits +
1796                                              s->misc_bits + s->i_tex_bits +
1797                                              s->p_tex_bits);
1798         flush_put_bits(&s->pb);
1799         s->frame_bits  = put_bits_count(&s->pb);
1800
1801         stuffing_count = ff_vbv_update(s, s->frame_bits);
1802         if (stuffing_count) {
1803             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1804                     stuffing_count + 50) {
1805                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1806                 return -1;
1807             }
1808
1809             switch (s->codec_id) {
1810             case AV_CODEC_ID_MPEG1VIDEO:
1811             case AV_CODEC_ID_MPEG2VIDEO:
1812                 while (stuffing_count--) {
1813                     put_bits(&s->pb, 8, 0);
1814                 }
1815             break;
1816             case AV_CODEC_ID_MPEG4:
1817                 put_bits(&s->pb, 16, 0);
1818                 put_bits(&s->pb, 16, 0x1C3);
1819                 stuffing_count -= 4;
1820                 while (stuffing_count--) {
1821                     put_bits(&s->pb, 8, 0xFF);
1822                 }
1823             break;
1824             default:
1825                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1826             }
1827             flush_put_bits(&s->pb);
1828             s->frame_bits  = put_bits_count(&s->pb);
1829         }
1830
1831         /* update mpeg1/2 vbv_delay for CBR */
1832         if (s->avctx->rc_max_rate                          &&
1833             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1834             s->out_format == FMT_MPEG1                     &&
1835             90000LL * (avctx->rc_buffer_size - 1) <=
1836                 s->avctx->rc_max_rate * 0xFFFFLL) {
1837             AVCPBProperties *props;
1838             size_t props_size;
1839
1840             int vbv_delay, min_delay;
1841             double inbits  = s->avctx->rc_max_rate *
1842                              av_q2d(s->avctx->time_base);
1843             int    minbits = s->frame_bits - 8 *
1844                              (s->vbv_delay_ptr - s->pb.buf - 1);
1845             double bits    = s->rc_context.buffer_index + minbits - inbits;
1846
1847             if (bits < 0)
1848                 av_log(s->avctx, AV_LOG_ERROR,
1849                        "Internal error, negative bits\n");
1850
1851             assert(s->repeat_first_field == 0);
1852
1853             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1854             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1855                         s->avctx->rc_max_rate;
1856
1857             vbv_delay = FFMAX(vbv_delay, min_delay);
1858
1859             assert(vbv_delay < 0xFFFF);
1860
1861             s->vbv_delay_ptr[0] &= 0xF8;
1862             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1863             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1864             s->vbv_delay_ptr[2] &= 0x07;
1865             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1866
1867             props = av_cpb_properties_alloc(&props_size);
1868             if (!props)
1869                 return AVERROR(ENOMEM);
1870             props->vbv_delay = vbv_delay * 300;
1871
1872             ret = av_packet_add_side_data(pkt, AV_PKT_DATA_CPB_PROPERTIES,
1873                                           (uint8_t*)props, props_size);
1874             if (ret < 0) {
1875                 av_freep(&props);
1876                 return ret;
1877             }
1878
1879 #if FF_API_VBV_DELAY
1880 FF_DISABLE_DEPRECATION_WARNINGS
1881             avctx->vbv_delay     = vbv_delay * 300;
1882 FF_ENABLE_DEPRECATION_WARNINGS
1883 #endif
1884         }
1885         s->total_bits     += s->frame_bits;
1886 #if FF_API_STAT_BITS
1887 FF_DISABLE_DEPRECATION_WARNINGS
1888         avctx->frame_bits  = s->frame_bits;
1889 FF_ENABLE_DEPRECATION_WARNINGS
1890 #endif
1891
1892
1893         pkt->pts = s->current_picture.f->pts;
1894         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1895             if (!s->current_picture.f->coded_picture_number)
1896                 pkt->dts = pkt->pts - s->dts_delta;
1897             else
1898                 pkt->dts = s->reordered_pts;
1899             s->reordered_pts = pkt->pts;
1900         } else
1901             pkt->dts = pkt->pts;
1902         if (s->current_picture.f->key_frame)
1903             pkt->flags |= AV_PKT_FLAG_KEY;
1904         if (s->mb_info)
1905             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1906     } else {
1907         s->frame_bits = 0;
1908     }
1909     assert((s->frame_bits & 7) == 0);
1910
1911     pkt->size = s->frame_bits / 8;
1912     *got_packet = !!pkt->size;
1913     return 0;
1914 }
1915
1916 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1917                                                 int n, int threshold)
1918 {
1919     static const char tab[64] = {
1920         3, 2, 2, 1, 1, 1, 1, 1,
1921         1, 1, 1, 1, 1, 1, 1, 1,
1922         1, 1, 1, 1, 1, 1, 1, 1,
1923         0, 0, 0, 0, 0, 0, 0, 0,
1924         0, 0, 0, 0, 0, 0, 0, 0,
1925         0, 0, 0, 0, 0, 0, 0, 0,
1926         0, 0, 0, 0, 0, 0, 0, 0,
1927         0, 0, 0, 0, 0, 0, 0, 0
1928     };
1929     int score = 0;
1930     int run = 0;
1931     int i;
1932     int16_t *block = s->block[n];
1933     const int last_index = s->block_last_index[n];
1934     int skip_dc;
1935
1936     if (threshold < 0) {
1937         skip_dc = 0;
1938         threshold = -threshold;
1939     } else
1940         skip_dc = 1;
1941
1942     /* Are all we could set to zero already zero? */
1943     if (last_index <= skip_dc - 1)
1944         return;
1945
1946     for (i = 0; i <= last_index; i++) {
1947         const int j = s->intra_scantable.permutated[i];
1948         const int level = FFABS(block[j]);
1949         if (level == 1) {
1950             if (skip_dc && i == 0)
1951                 continue;
1952             score += tab[run];
1953             run = 0;
1954         } else if (level > 1) {
1955             return;
1956         } else {
1957             run++;
1958         }
1959     }
1960     if (score >= threshold)
1961         return;
1962     for (i = skip_dc; i <= last_index; i++) {
1963         const int j = s->intra_scantable.permutated[i];
1964         block[j] = 0;
1965     }
1966     if (block[0])
1967         s->block_last_index[n] = 0;
1968     else
1969         s->block_last_index[n] = -1;
1970 }
1971
1972 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1973                                int last_index)
1974 {
1975     int i;
1976     const int maxlevel = s->max_qcoeff;
1977     const int minlevel = s->min_qcoeff;
1978     int overflow = 0;
1979
1980     if (s->mb_intra) {
1981         i = 1; // skip clipping of intra dc
1982     } else
1983         i = 0;
1984
1985     for (; i <= last_index; i++) {
1986         const int j = s->intra_scantable.permutated[i];
1987         int level = block[j];
1988
1989         if (level > maxlevel) {
1990             level = maxlevel;
1991             overflow++;
1992         } else if (level < minlevel) {
1993             level = minlevel;
1994             overflow++;
1995         }
1996
1997         block[j] = level;
1998     }
1999
2000     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2001         av_log(s->avctx, AV_LOG_INFO,
2002                "warning, clipping %d dct coefficients to %d..%d\n",
2003                overflow, minlevel, maxlevel);
2004 }
2005
2006 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2007 {
2008     int x, y;
2009     // FIXME optimize
2010     for (y = 0; y < 8; y++) {
2011         for (x = 0; x < 8; x++) {
2012             int x2, y2;
2013             int sum = 0;
2014             int sqr = 0;
2015             int count = 0;
2016
2017             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2018                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2019                     int v = ptr[x2 + y2 * stride];
2020                     sum += v;
2021                     sqr += v * v;
2022                     count++;
2023                 }
2024             }
2025             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2026         }
2027     }
2028 }
2029
2030 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2031                                                 int motion_x, int motion_y,
2032                                                 int mb_block_height,
2033                                                 int mb_block_count)
2034 {
2035     int16_t weight[8][64];
2036     int16_t orig[8][64];
2037     const int mb_x = s->mb_x;
2038     const int mb_y = s->mb_y;
2039     int i;
2040     int skip_dct[8];
2041     int dct_offset = s->linesize * 8; // default for progressive frames
2042     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2043     ptrdiff_t wrap_y, wrap_c;
2044
2045     for (i = 0; i < mb_block_count; i++)
2046         skip_dct[i] = s->skipdct;
2047
2048     if (s->adaptive_quant) {
2049         const int last_qp = s->qscale;
2050         const int mb_xy = mb_x + mb_y * s->mb_stride;
2051
2052         s->lambda = s->lambda_table[mb_xy];
2053         update_qscale(s);
2054
2055         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2056             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2057             s->dquant = s->qscale - last_qp;
2058
2059             if (s->out_format == FMT_H263) {
2060                 s->dquant = av_clip(s->dquant, -2, 2);
2061
2062                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2063                     if (!s->mb_intra) {
2064                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2065                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2066                                 s->dquant = 0;
2067                         }
2068                         if (s->mv_type == MV_TYPE_8X8)
2069                             s->dquant = 0;
2070                     }
2071                 }
2072             }
2073         }
2074         ff_set_qscale(s, last_qp + s->dquant);
2075     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2076         ff_set_qscale(s, s->qscale + s->dquant);
2077
2078     wrap_y = s->linesize;
2079     wrap_c = s->uvlinesize;
2080     ptr_y  = s->new_picture.f->data[0] +
2081              (mb_y * 16 * wrap_y)              + mb_x * 16;
2082     ptr_cb = s->new_picture.f->data[1] +
2083              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
2084     ptr_cr = s->new_picture.f->data[2] +
2085              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
2086
2087     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
2088         uint8_t *ebuf = s->sc.edge_emu_buffer + 32;
2089         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2090                                  wrap_y, wrap_y,
2091                                  16, 16, mb_x * 16, mb_y * 16,
2092                                  s->width, s->height);
2093         ptr_y = ebuf;
2094         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
2095                                  wrap_c, wrap_c,
2096                                  8, mb_block_height, mb_x * 8, mb_y * 8,
2097                                  s->width >> 1, s->height >> 1);
2098         ptr_cb = ebuf + 18 * wrap_y;
2099         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
2100                                  wrap_c, wrap_c,
2101                                  8, mb_block_height, mb_x * 8, mb_y * 8,
2102                                  s->width >> 1, s->height >> 1);
2103         ptr_cr = ebuf + 18 * wrap_y + 8;
2104     }
2105
2106     if (s->mb_intra) {
2107         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2108             int progressive_score, interlaced_score;
2109
2110             s->interlaced_dct = 0;
2111             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2112                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2113                                                      NULL, wrap_y, 8) - 400;
2114
2115             if (progressive_score > 0) {
2116                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2117                                                         NULL, wrap_y * 2, 8) +
2118                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2119                                                         NULL, wrap_y * 2, 8);
2120                 if (progressive_score > interlaced_score) {
2121                     s->interlaced_dct = 1;
2122
2123                     dct_offset = wrap_y;
2124                     wrap_y <<= 1;
2125                     if (s->chroma_format == CHROMA_422)
2126                         wrap_c <<= 1;
2127                 }
2128             }
2129         }
2130
2131         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2132         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2133         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2134         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2135
2136         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2137             skip_dct[4] = 1;
2138             skip_dct[5] = 1;
2139         } else {
2140             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2141             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2142             if (!s->chroma_y_shift) { /* 422 */
2143                 s->pdsp.get_pixels(s->block[6],
2144                                    ptr_cb + (dct_offset >> 1), wrap_c);
2145                 s->pdsp.get_pixels(s->block[7],
2146                                    ptr_cr + (dct_offset >> 1), wrap_c);
2147             }
2148         }
2149     } else {
2150         op_pixels_func (*op_pix)[4];
2151         qpel_mc_func (*op_qpix)[16];
2152         uint8_t *dest_y, *dest_cb, *dest_cr;
2153
2154         dest_y  = s->dest[0];
2155         dest_cb = s->dest[1];
2156         dest_cr = s->dest[2];
2157
2158         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2159             op_pix  = s->hdsp.put_pixels_tab;
2160             op_qpix = s->qdsp.put_qpel_pixels_tab;
2161         } else {
2162             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2163             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2164         }
2165
2166         if (s->mv_dir & MV_DIR_FORWARD) {
2167             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2168                           s->last_picture.f->data,
2169                           op_pix, op_qpix);
2170             op_pix  = s->hdsp.avg_pixels_tab;
2171             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2172         }
2173         if (s->mv_dir & MV_DIR_BACKWARD) {
2174             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2175                           s->next_picture.f->data,
2176                           op_pix, op_qpix);
2177         }
2178
2179         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2180             int progressive_score, interlaced_score;
2181
2182             s->interlaced_dct = 0;
2183             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2184                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2185                                                      ptr_y + wrap_y * 8,
2186                                                      wrap_y, 8) - 400;
2187
2188             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2189                 progressive_score -= 400;
2190
2191             if (progressive_score > 0) {
2192                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2193                                                         wrap_y * 2, 8) +
2194                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2195                                                         ptr_y + wrap_y,
2196                                                         wrap_y * 2, 8);
2197
2198                 if (progressive_score > interlaced_score) {
2199                     s->interlaced_dct = 1;
2200
2201                     dct_offset = wrap_y;
2202                     wrap_y <<= 1;
2203                     if (s->chroma_format == CHROMA_422)
2204                         wrap_c <<= 1;
2205                 }
2206             }
2207         }
2208
2209         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2210         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2211         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2212                             dest_y + dct_offset, wrap_y);
2213         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2214                             dest_y + dct_offset + 8, wrap_y);
2215
2216         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2217             skip_dct[4] = 1;
2218             skip_dct[5] = 1;
2219         } else {
2220             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2221             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2222             if (!s->chroma_y_shift) { /* 422 */
2223                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2224                                     dest_cb + (dct_offset >> 1), wrap_c);
2225                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2226                                     dest_cr + (dct_offset >> 1), wrap_c);
2227             }
2228         }
2229         /* pre quantization */
2230         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2231                 2 * s->qscale * s->qscale) {
2232             // FIXME optimize
2233             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2234                 skip_dct[0] = 1;
2235             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2236                 skip_dct[1] = 1;
2237             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2238                                wrap_y, 8) < 20 * s->qscale)
2239                 skip_dct[2] = 1;
2240             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2241                                wrap_y, 8) < 20 * s->qscale)
2242                 skip_dct[3] = 1;
2243             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2244                 skip_dct[4] = 1;
2245             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2246                 skip_dct[5] = 1;
2247             if (!s->chroma_y_shift) { /* 422 */
2248                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2249                                    dest_cb + (dct_offset >> 1),
2250                                    wrap_c, 8) < 20 * s->qscale)
2251                     skip_dct[6] = 1;
2252                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2253                                    dest_cr + (dct_offset >> 1),
2254                                    wrap_c, 8) < 20 * s->qscale)
2255                     skip_dct[7] = 1;
2256             }
2257         }
2258     }
2259
2260     if (s->quantizer_noise_shaping) {
2261         if (!skip_dct[0])
2262             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2263         if (!skip_dct[1])
2264             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2265         if (!skip_dct[2])
2266             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2267         if (!skip_dct[3])
2268             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2269         if (!skip_dct[4])
2270             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2271         if (!skip_dct[5])
2272             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2273         if (!s->chroma_y_shift) { /* 422 */
2274             if (!skip_dct[6])
2275                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2276                                   wrap_c);
2277             if (!skip_dct[7])
2278                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2279                                   wrap_c);
2280         }
2281         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2282     }
2283
2284     /* DCT & quantize */
2285     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2286     {
2287         for (i = 0; i < mb_block_count; i++) {
2288             if (!skip_dct[i]) {
2289                 int overflow;
2290                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2291                 // FIXME we could decide to change to quantizer instead of
2292                 // clipping
2293                 // JS: I don't think that would be a good idea it could lower
2294                 //     quality instead of improve it. Just INTRADC clipping
2295                 //     deserves changes in quantizer
2296                 if (overflow)
2297                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2298             } else
2299                 s->block_last_index[i] = -1;
2300         }
2301         if (s->quantizer_noise_shaping) {
2302             for (i = 0; i < mb_block_count; i++) {
2303                 if (!skip_dct[i]) {
2304                     s->block_last_index[i] =
2305                         dct_quantize_refine(s, s->block[i], weight[i],
2306                                             orig[i], i, s->qscale);
2307                 }
2308             }
2309         }
2310
2311         if (s->luma_elim_threshold && !s->mb_intra)
2312             for (i = 0; i < 4; i++)
2313                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2314         if (s->chroma_elim_threshold && !s->mb_intra)
2315             for (i = 4; i < mb_block_count; i++)
2316                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2317
2318         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2319             for (i = 0; i < mb_block_count; i++) {
2320                 if (s->block_last_index[i] == -1)
2321                     s->coded_score[i] = INT_MAX / 256;
2322             }
2323         }
2324     }
2325
2326     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2327         s->block_last_index[4] =
2328         s->block_last_index[5] = 0;
2329         s->block[4][0] =
2330         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2331     }
2332
2333     // non c quantize code returns incorrect block_last_index FIXME
2334     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2335         for (i = 0; i < mb_block_count; i++) {
2336             int j;
2337             if (s->block_last_index[i] > 0) {
2338                 for (j = 63; j > 0; j--) {
2339                     if (s->block[i][s->intra_scantable.permutated[j]])
2340                         break;
2341                 }
2342                 s->block_last_index[i] = j;
2343             }
2344         }
2345     }
2346
2347     /* huffman encode */
2348     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2349     case AV_CODEC_ID_MPEG1VIDEO:
2350     case AV_CODEC_ID_MPEG2VIDEO:
2351         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2352             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2353         break;
2354     case AV_CODEC_ID_MPEG4:
2355         if (CONFIG_MPEG4_ENCODER)
2356             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2357         break;
2358     case AV_CODEC_ID_MSMPEG4V2:
2359     case AV_CODEC_ID_MSMPEG4V3:
2360     case AV_CODEC_ID_WMV1:
2361         if (CONFIG_MSMPEG4_ENCODER)
2362             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2363         break;
2364     case AV_CODEC_ID_WMV2:
2365         if (CONFIG_WMV2_ENCODER)
2366             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2367         break;
2368     case AV_CODEC_ID_H261:
2369         if (CONFIG_H261_ENCODER)
2370             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2371         break;
2372     case AV_CODEC_ID_H263:
2373     case AV_CODEC_ID_H263P:
2374     case AV_CODEC_ID_FLV1:
2375     case AV_CODEC_ID_RV10:
2376     case AV_CODEC_ID_RV20:
2377         if (CONFIG_H263_ENCODER)
2378             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2379         break;
2380     case AV_CODEC_ID_MJPEG:
2381         if (CONFIG_MJPEG_ENCODER)
2382             ff_mjpeg_encode_mb(s, s->block);
2383         break;
2384     default:
2385         assert(0);
2386     }
2387 }
2388
2389 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2390 {
2391     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2392     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2393 }
2394
2395 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2396     int i;
2397
2398     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2399
2400     /* mpeg1 */
2401     d->mb_skip_run= s->mb_skip_run;
2402     for(i=0; i<3; i++)
2403         d->last_dc[i] = s->last_dc[i];
2404
2405     /* statistics */
2406     d->mv_bits= s->mv_bits;
2407     d->i_tex_bits= s->i_tex_bits;
2408     d->p_tex_bits= s->p_tex_bits;
2409     d->i_count= s->i_count;
2410     d->f_count= s->f_count;
2411     d->b_count= s->b_count;
2412     d->skip_count= s->skip_count;
2413     d->misc_bits= s->misc_bits;
2414     d->last_bits= 0;
2415
2416     d->mb_skipped= 0;
2417     d->qscale= s->qscale;
2418     d->dquant= s->dquant;
2419
2420     d->esc3_level_length= s->esc3_level_length;
2421 }
2422
2423 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2424     int i;
2425
2426     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2427     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2428
2429     /* mpeg1 */
2430     d->mb_skip_run= s->mb_skip_run;
2431     for(i=0; i<3; i++)
2432         d->last_dc[i] = s->last_dc[i];
2433
2434     /* statistics */
2435     d->mv_bits= s->mv_bits;
2436     d->i_tex_bits= s->i_tex_bits;
2437     d->p_tex_bits= s->p_tex_bits;
2438     d->i_count= s->i_count;
2439     d->f_count= s->f_count;
2440     d->b_count= s->b_count;
2441     d->skip_count= s->skip_count;
2442     d->misc_bits= s->misc_bits;
2443
2444     d->mb_intra= s->mb_intra;
2445     d->mb_skipped= s->mb_skipped;
2446     d->mv_type= s->mv_type;
2447     d->mv_dir= s->mv_dir;
2448     d->pb= s->pb;
2449     if(s->data_partitioning){
2450         d->pb2= s->pb2;
2451         d->tex_pb= s->tex_pb;
2452     }
2453     d->block= s->block;
2454     for(i=0; i<8; i++)
2455         d->block_last_index[i]= s->block_last_index[i];
2456     d->interlaced_dct= s->interlaced_dct;
2457     d->qscale= s->qscale;
2458
2459     d->esc3_level_length= s->esc3_level_length;
2460 }
2461
2462 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2463                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2464                            int *dmin, int *next_block, int motion_x, int motion_y)
2465 {
2466     int score;
2467     uint8_t *dest_backup[3];
2468
2469     copy_context_before_encode(s, backup, type);
2470
2471     s->block= s->blocks[*next_block];
2472     s->pb= pb[*next_block];
2473     if(s->data_partitioning){
2474         s->pb2   = pb2   [*next_block];
2475         s->tex_pb= tex_pb[*next_block];
2476     }
2477
2478     if(*next_block){
2479         memcpy(dest_backup, s->dest, sizeof(s->dest));
2480         s->dest[0] = s->sc.rd_scratchpad;
2481         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2482         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2483         assert(s->linesize >= 32); //FIXME
2484     }
2485
2486     encode_mb(s, motion_x, motion_y);
2487
2488     score= put_bits_count(&s->pb);
2489     if(s->data_partitioning){
2490         score+= put_bits_count(&s->pb2);
2491         score+= put_bits_count(&s->tex_pb);
2492     }
2493
2494     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2495         ff_mpv_decode_mb(s, s->block);
2496
2497         score *= s->lambda2;
2498         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2499     }
2500
2501     if(*next_block){
2502         memcpy(s->dest, dest_backup, sizeof(s->dest));
2503     }
2504
2505     if(score<*dmin){
2506         *dmin= score;
2507         *next_block^=1;
2508
2509         copy_context_after_encode(best, s, type);
2510     }
2511 }
2512
2513 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2514     uint32_t *sq = ff_square_tab + 256;
2515     int acc=0;
2516     int x,y;
2517
2518     if(w==16 && h==16)
2519         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2520     else if(w==8 && h==8)
2521         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2522
2523     for(y=0; y<h; y++){
2524         for(x=0; x<w; x++){
2525             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2526         }
2527     }
2528
2529     assert(acc>=0);
2530
2531     return acc;
2532 }
2533
2534 static int sse_mb(MpegEncContext *s){
2535     int w= 16;
2536     int h= 16;
2537
2538     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2539     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2540
2541     if(w==16 && h==16)
2542       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2543         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2544                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2545                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2546       }else{
2547         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2548                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2549                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2550       }
2551     else
2552         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2553                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2554                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2555 }
2556
2557 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2558     MpegEncContext *s= *(void**)arg;
2559
2560
2561     s->me.pre_pass=1;
2562     s->me.dia_size= s->avctx->pre_dia_size;
2563     s->first_slice_line=1;
2564     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2565         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2566             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2567         }
2568         s->first_slice_line=0;
2569     }
2570
2571     s->me.pre_pass=0;
2572
2573     return 0;
2574 }
2575
2576 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2577     MpegEncContext *s= *(void**)arg;
2578
2579     s->me.dia_size= s->avctx->dia_size;
2580     s->first_slice_line=1;
2581     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2582         s->mb_x=0; //for block init below
2583         ff_init_block_index(s);
2584         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2585             s->block_index[0]+=2;
2586             s->block_index[1]+=2;
2587             s->block_index[2]+=2;
2588             s->block_index[3]+=2;
2589
2590             /* compute motion vector & mb_type and store in context */
2591             if(s->pict_type==AV_PICTURE_TYPE_B)
2592                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2593             else
2594                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2595         }
2596         s->first_slice_line=0;
2597     }
2598     return 0;
2599 }
2600
2601 static int mb_var_thread(AVCodecContext *c, void *arg){
2602     MpegEncContext *s= *(void**)arg;
2603     int mb_x, mb_y;
2604
2605     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2606         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2607             int xx = mb_x * 16;
2608             int yy = mb_y * 16;
2609             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2610             int varc;
2611             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2612
2613             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2614                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2615
2616             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2617             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2618             s->me.mb_var_sum_temp    += varc;
2619         }
2620     }
2621     return 0;
2622 }
2623
2624 static void write_slice_end(MpegEncContext *s){
2625     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2626         if(s->partitioned_frame){
2627             ff_mpeg4_merge_partitions(s);
2628         }
2629
2630         ff_mpeg4_stuffing(&s->pb);
2631     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2632         ff_mjpeg_encode_stuffing(&s->pb);
2633     }
2634
2635     avpriv_align_put_bits(&s->pb);
2636     flush_put_bits(&s->pb);
2637
2638     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2639         s->misc_bits+= get_bits_diff(s);
2640 }
2641
2642 static void write_mb_info(MpegEncContext *s)
2643 {
2644     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2645     int offset = put_bits_count(&s->pb);
2646     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2647     int gobn = s->mb_y / s->gob_index;
2648     int pred_x, pred_y;
2649     if (CONFIG_H263_ENCODER)
2650         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2651     bytestream_put_le32(&ptr, offset);
2652     bytestream_put_byte(&ptr, s->qscale);
2653     bytestream_put_byte(&ptr, gobn);
2654     bytestream_put_le16(&ptr, mba);
2655     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2656     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2657     /* 4MV not implemented */
2658     bytestream_put_byte(&ptr, 0); /* hmv2 */
2659     bytestream_put_byte(&ptr, 0); /* vmv2 */
2660 }
2661
2662 static void update_mb_info(MpegEncContext *s, int startcode)
2663 {
2664     if (!s->mb_info)
2665         return;
2666     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2667         s->mb_info_size += 12;
2668         s->prev_mb_info = s->last_mb_info;
2669     }
2670     if (startcode) {
2671         s->prev_mb_info = put_bits_count(&s->pb)/8;
2672         /* This might have incremented mb_info_size above, and we return without
2673          * actually writing any info into that slot yet. But in that case,
2674          * this will be called again at the start of the after writing the
2675          * start code, actually writing the mb info. */
2676         return;
2677     }
2678
2679     s->last_mb_info = put_bits_count(&s->pb)/8;
2680     if (!s->mb_info_size)
2681         s->mb_info_size += 12;
2682     write_mb_info(s);
2683 }
2684
2685 static int encode_thread(AVCodecContext *c, void *arg){
2686     MpegEncContext *s= *(void**)arg;
2687     int mb_x, mb_y, pdif = 0;
2688     int chr_h= 16>>s->chroma_y_shift;
2689     int i, j;
2690     MpegEncContext best_s = { 0 }, backup_s;
2691     uint8_t bit_buf[2][MAX_MB_BYTES];
2692     uint8_t bit_buf2[2][MAX_MB_BYTES];
2693     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2694     PutBitContext pb[2], pb2[2], tex_pb[2];
2695
2696     for(i=0; i<2; i++){
2697         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2698         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2699         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2700     }
2701
2702     s->last_bits= put_bits_count(&s->pb);
2703     s->mv_bits=0;
2704     s->misc_bits=0;
2705     s->i_tex_bits=0;
2706     s->p_tex_bits=0;
2707     s->i_count=0;
2708     s->f_count=0;
2709     s->b_count=0;
2710     s->skip_count=0;
2711
2712     for(i=0; i<3; i++){
2713         /* init last dc values */
2714         /* note: quant matrix value (8) is implied here */
2715         s->last_dc[i] = 128 << s->intra_dc_precision;
2716
2717         s->current_picture.encoding_error[i] = 0;
2718     }
2719     s->mb_skip_run = 0;
2720     memset(s->last_mv, 0, sizeof(s->last_mv));
2721
2722     s->last_mv_dir = 0;
2723
2724     switch(s->codec_id){
2725     case AV_CODEC_ID_H263:
2726     case AV_CODEC_ID_H263P:
2727     case AV_CODEC_ID_FLV1:
2728         if (CONFIG_H263_ENCODER)
2729             s->gob_index = H263_GOB_HEIGHT(s->height);
2730         break;
2731     case AV_CODEC_ID_MPEG4:
2732         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2733             ff_mpeg4_init_partitions(s);
2734         break;
2735     }
2736
2737     s->resync_mb_x=0;
2738     s->resync_mb_y=0;
2739     s->first_slice_line = 1;
2740     s->ptr_lastgob = s->pb.buf;
2741     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2742         s->mb_x=0;
2743         s->mb_y= mb_y;
2744
2745         ff_set_qscale(s, s->qscale);
2746         ff_init_block_index(s);
2747
2748         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2749             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2750             int mb_type= s->mb_type[xy];
2751 //            int d;
2752             int dmin= INT_MAX;
2753             int dir;
2754
2755             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2756                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2757                 return -1;
2758             }
2759             if(s->data_partitioning){
2760                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2761                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2762                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2763                     return -1;
2764                 }
2765             }
2766
2767             s->mb_x = mb_x;
2768             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2769             ff_update_block_index(s);
2770
2771             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2772                 ff_h261_reorder_mb_index(s);
2773                 xy= s->mb_y*s->mb_stride + s->mb_x;
2774                 mb_type= s->mb_type[xy];
2775             }
2776
2777             /* write gob / video packet header  */
2778             if(s->rtp_mode){
2779                 int current_packet_size, is_gob_start;
2780
2781                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2782
2783                 is_gob_start = s->rtp_payload_size &&
2784                                current_packet_size >= s->rtp_payload_size &&
2785                                mb_y + mb_x > 0;
2786
2787                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2788
2789                 switch(s->codec_id){
2790                 case AV_CODEC_ID_H263:
2791                 case AV_CODEC_ID_H263P:
2792                     if(!s->h263_slice_structured)
2793                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2794                     break;
2795                 case AV_CODEC_ID_MPEG2VIDEO:
2796                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2797                 case AV_CODEC_ID_MPEG1VIDEO:
2798                     if(s->mb_skip_run) is_gob_start=0;
2799                     break;
2800                 }
2801
2802                 if(is_gob_start){
2803                     if(s->start_mb_y != mb_y || mb_x!=0){
2804                         write_slice_end(s);
2805
2806                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2807                             ff_mpeg4_init_partitions(s);
2808                         }
2809                     }
2810
2811                     assert((put_bits_count(&s->pb)&7) == 0);
2812                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2813
2814                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2815                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2816                         int d = 100 / s->error_rate;
2817                         if(r % d == 0){
2818                             current_packet_size=0;
2819                             s->pb.buf_ptr= s->ptr_lastgob;
2820                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2821                         }
2822                     }
2823
2824 #if FF_API_RTP_CALLBACK
2825 FF_DISABLE_DEPRECATION_WARNINGS
2826                     if (s->avctx->rtp_callback){
2827                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2828                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2829                     }
2830 FF_ENABLE_DEPRECATION_WARNINGS
2831 #endif
2832                     update_mb_info(s, 1);
2833
2834                     switch(s->codec_id){
2835                     case AV_CODEC_ID_MPEG4:
2836                         if (CONFIG_MPEG4_ENCODER) {
2837                             ff_mpeg4_encode_video_packet_header(s);
2838                             ff_mpeg4_clean_buffers(s);
2839                         }
2840                     break;
2841                     case AV_CODEC_ID_MPEG1VIDEO:
2842                     case AV_CODEC_ID_MPEG2VIDEO:
2843                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2844                             ff_mpeg1_encode_slice_header(s);
2845                             ff_mpeg1_clean_buffers(s);
2846                         }
2847                     break;
2848                     case AV_CODEC_ID_H263:
2849                     case AV_CODEC_ID_H263P:
2850                         if (CONFIG_H263_ENCODER)
2851                             ff_h263_encode_gob_header(s, mb_y);
2852                     break;
2853                     }
2854
2855                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2856                         int bits= put_bits_count(&s->pb);
2857                         s->misc_bits+= bits - s->last_bits;
2858                         s->last_bits= bits;
2859                     }
2860
2861                     s->ptr_lastgob += current_packet_size;
2862                     s->first_slice_line=1;
2863                     s->resync_mb_x=mb_x;
2864                     s->resync_mb_y=mb_y;
2865                 }
2866             }
2867
2868             if(  (s->resync_mb_x   == s->mb_x)
2869                && s->resync_mb_y+1 == s->mb_y){
2870                 s->first_slice_line=0;
2871             }
2872
2873             s->mb_skipped=0;
2874             s->dquant=0; //only for QP_RD
2875
2876             update_mb_info(s, 0);
2877
2878             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2879                 int next_block=0;
2880                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2881
2882                 copy_context_before_encode(&backup_s, s, -1);
2883                 backup_s.pb= s->pb;
2884                 best_s.data_partitioning= s->data_partitioning;
2885                 best_s.partitioned_frame= s->partitioned_frame;
2886                 if(s->data_partitioning){
2887                     backup_s.pb2= s->pb2;
2888                     backup_s.tex_pb= s->tex_pb;
2889                 }
2890
2891                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2892                     s->mv_dir = MV_DIR_FORWARD;
2893                     s->mv_type = MV_TYPE_16X16;
2894                     s->mb_intra= 0;
2895                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2896                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2897                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2898                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2899                 }
2900                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2901                     s->mv_dir = MV_DIR_FORWARD;
2902                     s->mv_type = MV_TYPE_FIELD;
2903                     s->mb_intra= 0;
2904                     for(i=0; i<2; i++){
2905                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2906                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2907                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2908                     }
2909                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2910                                  &dmin, &next_block, 0, 0);
2911                 }
2912                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2913                     s->mv_dir = MV_DIR_FORWARD;
2914                     s->mv_type = MV_TYPE_16X16;
2915                     s->mb_intra= 0;
2916                     s->mv[0][0][0] = 0;
2917                     s->mv[0][0][1] = 0;
2918                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2919                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2920                 }
2921                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2922                     s->mv_dir = MV_DIR_FORWARD;
2923                     s->mv_type = MV_TYPE_8X8;
2924                     s->mb_intra= 0;
2925                     for(i=0; i<4; i++){
2926                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2927                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2928                     }
2929                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2930                                  &dmin, &next_block, 0, 0);
2931                 }
2932                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2933                     s->mv_dir = MV_DIR_FORWARD;
2934                     s->mv_type = MV_TYPE_16X16;
2935                     s->mb_intra= 0;
2936                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2937                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2938                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2939                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2940                 }
2941                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2942                     s->mv_dir = MV_DIR_BACKWARD;
2943                     s->mv_type = MV_TYPE_16X16;
2944                     s->mb_intra= 0;
2945                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2946                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2947                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2948                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2949                 }
2950                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2951                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2952                     s->mv_type = MV_TYPE_16X16;
2953                     s->mb_intra= 0;
2954                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2955                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2956                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2957                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2958                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2959                                  &dmin, &next_block, 0, 0);
2960                 }
2961                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2962                     s->mv_dir = MV_DIR_FORWARD;
2963                     s->mv_type = MV_TYPE_FIELD;
2964                     s->mb_intra= 0;
2965                     for(i=0; i<2; i++){
2966                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2967                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2968                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2969                     }
2970                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2971                                  &dmin, &next_block, 0, 0);
2972                 }
2973                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2974                     s->mv_dir = MV_DIR_BACKWARD;
2975                     s->mv_type = MV_TYPE_FIELD;
2976                     s->mb_intra= 0;
2977                     for(i=0; i<2; i++){
2978                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2979                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2980                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2981                     }
2982                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2983                                  &dmin, &next_block, 0, 0);
2984                 }
2985                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2986                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2987                     s->mv_type = MV_TYPE_FIELD;
2988                     s->mb_intra= 0;
2989                     for(dir=0; dir<2; dir++){
2990                         for(i=0; i<2; i++){
2991                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2992                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2993                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2994                         }
2995                     }
2996                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2997                                  &dmin, &next_block, 0, 0);
2998                 }
2999                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3000                     s->mv_dir = 0;
3001                     s->mv_type = MV_TYPE_16X16;
3002                     s->mb_intra= 1;
3003                     s->mv[0][0][0] = 0;
3004                     s->mv[0][0][1] = 0;
3005                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3006                                  &dmin, &next_block, 0, 0);
3007                     if(s->h263_pred || s->h263_aic){
3008                         if(best_s.mb_intra)
3009                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3010                         else
3011                             ff_clean_intra_table_entries(s); //old mode?
3012                     }
3013                 }
3014
3015                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3016                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3017                         const int last_qp= backup_s.qscale;
3018                         int qpi, qp, dc[6];
3019                         int16_t ac[6][16];
3020                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3021                         static const int dquant_tab[4]={-1,1,-2,2};
3022
3023                         assert(backup_s.dquant == 0);
3024
3025                         //FIXME intra
3026                         s->mv_dir= best_s.mv_dir;
3027                         s->mv_type = MV_TYPE_16X16;
3028                         s->mb_intra= best_s.mb_intra;
3029                         s->mv[0][0][0] = best_s.mv[0][0][0];
3030                         s->mv[0][0][1] = best_s.mv[0][0][1];
3031                         s->mv[1][0][0] = best_s.mv[1][0][0];
3032                         s->mv[1][0][1] = best_s.mv[1][0][1];
3033
3034                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3035                         for(; qpi<4; qpi++){
3036                             int dquant= dquant_tab[qpi];
3037                             qp= last_qp + dquant;
3038                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3039                                 continue;
3040                             backup_s.dquant= dquant;
3041                             if(s->mb_intra && s->dc_val[0]){
3042                                 for(i=0; i<6; i++){
3043                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3044                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3045                                 }
3046                             }
3047
3048                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3049                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3050                             if(best_s.qscale != qp){
3051                                 if(s->mb_intra && s->dc_val[0]){
3052                                     for(i=0; i<6; i++){
3053                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3054                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3055                                     }
3056                                 }
3057                             }
3058                         }
3059                     }
3060                 }
3061                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3062                     int mx= s->b_direct_mv_table[xy][0];
3063                     int my= s->b_direct_mv_table[xy][1];
3064
3065                     backup_s.dquant = 0;
3066                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3067                     s->mb_intra= 0;
3068                     ff_mpeg4_set_direct_mv(s, mx, my);
3069                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3070                                  &dmin, &next_block, mx, my);
3071                 }
3072                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3073                     backup_s.dquant = 0;
3074                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3075                     s->mb_intra= 0;
3076                     ff_mpeg4_set_direct_mv(s, 0, 0);
3077                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3078                                  &dmin, &next_block, 0, 0);
3079                 }
3080                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3081                     int coded=0;
3082                     for(i=0; i<6; i++)
3083                         coded |= s->block_last_index[i];
3084                     if(coded){
3085                         int mx,my;
3086                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3087                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3088                             mx=my=0; //FIXME find the one we actually used
3089                             ff_mpeg4_set_direct_mv(s, mx, my);
3090                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3091                             mx= s->mv[1][0][0];
3092                             my= s->mv[1][0][1];
3093                         }else{
3094                             mx= s->mv[0][0][0];
3095                             my= s->mv[0][0][1];
3096                         }
3097
3098                         s->mv_dir= best_s.mv_dir;
3099                         s->mv_type = best_s.mv_type;
3100                         s->mb_intra= 0;
3101 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3102                         s->mv[0][0][1] = best_s.mv[0][0][1];
3103                         s->mv[1][0][0] = best_s.mv[1][0][0];
3104                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3105                         backup_s.dquant= 0;
3106                         s->skipdct=1;
3107                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3108                                         &dmin, &next_block, mx, my);
3109                         s->skipdct=0;
3110                     }
3111                 }
3112
3113                 s->current_picture.qscale_table[xy] = best_s.qscale;
3114
3115                 copy_context_after_encode(s, &best_s, -1);
3116
3117                 pb_bits_count= put_bits_count(&s->pb);
3118                 flush_put_bits(&s->pb);
3119                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3120                 s->pb= backup_s.pb;
3121
3122                 if(s->data_partitioning){
3123                     pb2_bits_count= put_bits_count(&s->pb2);
3124                     flush_put_bits(&s->pb2);
3125                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3126                     s->pb2= backup_s.pb2;
3127
3128                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3129                     flush_put_bits(&s->tex_pb);
3130                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3131                     s->tex_pb= backup_s.tex_pb;
3132                 }
3133                 s->last_bits= put_bits_count(&s->pb);
3134
3135                 if (CONFIG_H263_ENCODER &&
3136                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3137                     ff_h263_update_motion_val(s);
3138
3139                 if(next_block==0){ //FIXME 16 vs linesize16
3140                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3141                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3142                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3143                 }
3144
3145                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3146                     ff_mpv_decode_mb(s, s->block);
3147             } else {
3148                 int motion_x = 0, motion_y = 0;
3149                 s->mv_type=MV_TYPE_16X16;
3150                 // only one MB-Type possible
3151
3152                 switch(mb_type){
3153                 case CANDIDATE_MB_TYPE_INTRA:
3154                     s->mv_dir = 0;
3155                     s->mb_intra= 1;
3156                     motion_x= s->mv[0][0][0] = 0;
3157                     motion_y= s->mv[0][0][1] = 0;
3158                     break;
3159                 case CANDIDATE_MB_TYPE_INTER:
3160                     s->mv_dir = MV_DIR_FORWARD;
3161                     s->mb_intra= 0;
3162                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3163                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3164                     break;
3165                 case CANDIDATE_MB_TYPE_INTER_I:
3166                     s->mv_dir = MV_DIR_FORWARD;
3167                     s->mv_type = MV_TYPE_FIELD;
3168                     s->mb_intra= 0;
3169                     for(i=0; i<2; i++){
3170                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3171                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3172                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3173                     }
3174                     break;
3175                 case CANDIDATE_MB_TYPE_INTER4V:
3176                     s->mv_dir = MV_DIR_FORWARD;
3177                     s->mv_type = MV_TYPE_8X8;
3178                     s->mb_intra= 0;
3179                     for(i=0; i<4; i++){
3180                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3181                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3182                     }
3183                     break;
3184                 case CANDIDATE_MB_TYPE_DIRECT:
3185                     if (CONFIG_MPEG4_ENCODER) {
3186                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3187                         s->mb_intra= 0;
3188                         motion_x=s->b_direct_mv_table[xy][0];
3189                         motion_y=s->b_direct_mv_table[xy][1];
3190                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3191                     }
3192                     break;
3193                 case CANDIDATE_MB_TYPE_DIRECT0:
3194                     if (CONFIG_MPEG4_ENCODER) {
3195                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3196                         s->mb_intra= 0;
3197                         ff_mpeg4_set_direct_mv(s, 0, 0);
3198                     }
3199                     break;
3200                 case CANDIDATE_MB_TYPE_BIDIR:
3201                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3202                     s->mb_intra= 0;
3203                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3204                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3205                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3206                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3207                     break;
3208                 case CANDIDATE_MB_TYPE_BACKWARD:
3209                     s->mv_dir = MV_DIR_BACKWARD;
3210                     s->mb_intra= 0;
3211                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3212                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3213                     break;
3214                 case CANDIDATE_MB_TYPE_FORWARD:
3215                     s->mv_dir = MV_DIR_FORWARD;
3216                     s->mb_intra= 0;
3217                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3218                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3219                     break;
3220                 case CANDIDATE_MB_TYPE_FORWARD_I:
3221                     s->mv_dir = MV_DIR_FORWARD;
3222                     s->mv_type = MV_TYPE_FIELD;
3223                     s->mb_intra= 0;
3224                     for(i=0; i<2; i++){
3225                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3226                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3227                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3228                     }
3229                     break;
3230                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3231                     s->mv_dir = MV_DIR_BACKWARD;
3232                     s->mv_type = MV_TYPE_FIELD;
3233                     s->mb_intra= 0;
3234                     for(i=0; i<2; i++){
3235                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3236                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3237                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3238                     }
3239                     break;
3240                 case CANDIDATE_MB_TYPE_BIDIR_I:
3241                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3242                     s->mv_type = MV_TYPE_FIELD;
3243                     s->mb_intra= 0;
3244                     for(dir=0; dir<2; dir++){
3245                         for(i=0; i<2; i++){
3246                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3247                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3248                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3249                         }
3250                     }
3251                     break;
3252                 default:
3253                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3254                 }
3255
3256                 encode_mb(s, motion_x, motion_y);
3257
3258                 // RAL: Update last macroblock type
3259                 s->last_mv_dir = s->mv_dir;
3260
3261                 if (CONFIG_H263_ENCODER &&
3262                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3263                     ff_h263_update_motion_val(s);
3264
3265                 ff_mpv_decode_mb(s, s->block);
3266             }
3267
3268             /* clean the MV table in IPS frames for direct mode in B frames */
3269             if(s->mb_intra /* && I,P,S_TYPE */){
3270                 s->p_mv_table[xy][0]=0;
3271                 s->p_mv_table[xy][1]=0;
3272             }
3273
3274             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3275                 int w= 16;
3276                 int h= 16;
3277
3278                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3279                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3280
3281                 s->current_picture.encoding_error[0] += sse(
3282                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3283                     s->dest[0], w, h, s->linesize);
3284                 s->current_picture.encoding_error[1] += sse(
3285                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3286                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3287                 s->current_picture.encoding_error[2] += sse(
3288                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3289                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3290             }
3291             if(s->loop_filter){
3292                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3293                     ff_h263_loop_filter(s);
3294             }
3295             ff_dlog(s->avctx, "MB %d %d bits\n",
3296                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3297         }
3298     }
3299
3300     //not beautiful here but we must write it before flushing so it has to be here
3301     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3302         ff_msmpeg4_encode_ext_header(s);
3303
3304     write_slice_end(s);
3305
3306 #if FF_API_RTP_CALLBACK
3307 FF_DISABLE_DEPRECATION_WARNINGS
3308     /* Send the last GOB if RTP */
3309     if (s->avctx->rtp_callback) {
3310         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3311         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3312         /* Call the RTP callback to send the last GOB */
3313         emms_c();
3314         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3315     }
3316 FF_ENABLE_DEPRECATION_WARNINGS
3317 #endif
3318
3319     return 0;
3320 }
3321
3322 #define MERGE(field) dst->field += src->field; src->field=0
3323 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3324     MERGE(me.scene_change_score);
3325     MERGE(me.mc_mb_var_sum_temp);
3326     MERGE(me.mb_var_sum_temp);
3327 }
3328
3329 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3330     int i;
3331
3332     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3333     MERGE(dct_count[1]);
3334     MERGE(mv_bits);
3335     MERGE(i_tex_bits);
3336     MERGE(p_tex_bits);
3337     MERGE(i_count);
3338     MERGE(f_count);
3339     MERGE(b_count);
3340     MERGE(skip_count);
3341     MERGE(misc_bits);
3342     MERGE(er.error_count);
3343     MERGE(padding_bug_score);
3344     MERGE(current_picture.encoding_error[0]);
3345     MERGE(current_picture.encoding_error[1]);
3346     MERGE(current_picture.encoding_error[2]);
3347
3348     if (dst->noise_reduction){
3349         for(i=0; i<64; i++){
3350             MERGE(dct_error_sum[0][i]);
3351             MERGE(dct_error_sum[1][i]);
3352         }
3353     }
3354
3355     assert(put_bits_count(&src->pb) % 8 ==0);
3356     assert(put_bits_count(&dst->pb) % 8 ==0);
3357     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3358     flush_put_bits(&dst->pb);
3359 }
3360
3361 static int estimate_qp(MpegEncContext *s, int dry_run){
3362     if (s->next_lambda){
3363         s->current_picture_ptr->f->quality =
3364         s->current_picture.f->quality = s->next_lambda;
3365         if(!dry_run) s->next_lambda= 0;
3366     } else if (!s->fixed_qscale) {
3367         s->current_picture_ptr->f->quality =
3368         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3369         if (s->current_picture.f->quality < 0)
3370             return -1;
3371     }
3372
3373     if(s->adaptive_quant){
3374         switch(s->codec_id){
3375         case AV_CODEC_ID_MPEG4:
3376             if (CONFIG_MPEG4_ENCODER)
3377                 ff_clean_mpeg4_qscales(s);
3378             break;
3379         case AV_CODEC_ID_H263:
3380         case AV_CODEC_ID_H263P:
3381         case AV_CODEC_ID_FLV1:
3382             if (CONFIG_H263_ENCODER)
3383                 ff_clean_h263_qscales(s);
3384             break;
3385         default:
3386             ff_init_qscale_tab(s);
3387         }
3388
3389         s->lambda= s->lambda_table[0];
3390         //FIXME broken
3391     }else
3392         s->lambda = s->current_picture.f->quality;
3393     update_qscale(s);
3394     return 0;
3395 }
3396
3397 /* must be called before writing the header */
3398 static void set_frame_distances(MpegEncContext * s){
3399     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3400     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3401
3402     if(s->pict_type==AV_PICTURE_TYPE_B){
3403         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3404         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3405     }else{
3406         s->pp_time= s->time - s->last_non_b_time;
3407         s->last_non_b_time= s->time;
3408         assert(s->picture_number==0 || s->pp_time > 0);
3409     }
3410 }
3411
3412 static int encode_picture(MpegEncContext *s, int picture_number)
3413 {
3414     int i, ret;
3415     int bits;
3416     int context_count = s->slice_context_count;
3417
3418     s->picture_number = picture_number;
3419
3420     /* Reset the average MB variance */
3421     s->me.mb_var_sum_temp    =
3422     s->me.mc_mb_var_sum_temp = 0;
3423
3424     /* we need to initialize some time vars before we can encode b-frames */
3425     // RAL: Condition added for MPEG1VIDEO
3426     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3427         set_frame_distances(s);
3428     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3429         ff_set_mpeg4_time(s);
3430
3431     s->me.scene_change_score=0;
3432
3433 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3434
3435     if(s->pict_type==AV_PICTURE_TYPE_I){
3436         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3437         else                        s->no_rounding=0;
3438     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3439         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3440             s->no_rounding ^= 1;
3441     }
3442
3443     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3444         if (estimate_qp(s,1) < 0)
3445             return -1;
3446         ff_get_2pass_fcode(s);
3447     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3448         if(s->pict_type==AV_PICTURE_TYPE_B)
3449             s->lambda= s->last_lambda_for[s->pict_type];
3450         else
3451             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3452         update_qscale(s);
3453     }
3454
3455     s->mb_intra=0; //for the rate distortion & bit compare functions
3456     for(i=1; i<context_count; i++){
3457         ret = ff_update_duplicate_context(s->thread_context[i], s);
3458         if (ret < 0)
3459             return ret;
3460     }
3461
3462     if(ff_init_me(s)<0)
3463         return -1;
3464
3465     /* Estimate motion for every MB */
3466     if(s->pict_type != AV_PICTURE_TYPE_I){
3467         s->lambda  = (s->lambda  * s->me_penalty_compensation + 128) >> 8;
3468         s->lambda2 = (s->lambda2 * (int64_t) s->me_penalty_compensation + 128) >> 8;
3469         if (s->pict_type != AV_PICTURE_TYPE_B) {
3470             if ((s->me_pre && s->last_non_b_pict_type == AV_PICTURE_TYPE_I) ||
3471                 s->me_pre == 2) {
3472                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3473             }
3474         }
3475
3476         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3477     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3478         /* I-Frame */
3479         for(i=0; i<s->mb_stride*s->mb_height; i++)
3480             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3481
3482         if(!s->fixed_qscale){
3483             /* finding spatial complexity for I-frame rate control */
3484             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3485         }
3486     }
3487     for(i=1; i<context_count; i++){
3488         merge_context_after_me(s, s->thread_context[i]);
3489     }
3490     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3491     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3492     emms_c();
3493
3494     if (s->me.scene_change_score > s->scenechange_threshold &&
3495         s->pict_type == AV_PICTURE_TYPE_P) {
3496         s->pict_type= AV_PICTURE_TYPE_I;
3497         for(i=0; i<s->mb_stride*s->mb_height; i++)
3498             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3499         ff_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3500                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3501     }
3502
3503     if(!s->umvplus){
3504         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3505             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3506
3507             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3508                 int a,b;
3509                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3510                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3511                 s->f_code= FFMAX3(s->f_code, a, b);
3512             }
3513
3514             ff_fix_long_p_mvs(s);
3515             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3516             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3517                 int j;
3518                 for(i=0; i<2; i++){
3519                     for(j=0; j<2; j++)
3520                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3521                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3522                 }
3523             }
3524         }
3525
3526         if(s->pict_type==AV_PICTURE_TYPE_B){
3527             int a, b;
3528
3529             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3530             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3531             s->f_code = FFMAX(a, b);
3532
3533             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3534             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3535             s->b_code = FFMAX(a, b);
3536
3537             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3538             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3539             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3540             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3541             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3542                 int dir, j;
3543                 for(dir=0; dir<2; dir++){
3544                     for(i=0; i<2; i++){
3545                         for(j=0; j<2; j++){
3546                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3547                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3548                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3549                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3550                         }
3551                     }
3552                 }
3553             }
3554         }
3555     }
3556
3557     if (estimate_qp(s, 0) < 0)
3558         return -1;
3559
3560     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3561         s->pict_type == AV_PICTURE_TYPE_I &&
3562         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3563         s->qscale= 3; //reduce clipping problems
3564
3565     if (s->out_format == FMT_MJPEG) {
3566         /* for mjpeg, we do include qscale in the matrix */
3567         for(i=1;i<64;i++){
3568             int j = s->idsp.idct_permutation[i];
3569
3570             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3571         }
3572         s->y_dc_scale_table=
3573         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3574         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3575         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3576                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3577         s->qscale= 8;
3578     }
3579
3580     //FIXME var duplication
3581     s->current_picture_ptr->f->key_frame =
3582     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3583     s->current_picture_ptr->f->pict_type =
3584     s->current_picture.f->pict_type = s->pict_type;
3585
3586     if (s->current_picture.f->key_frame)
3587         s->picture_in_gop_number=0;
3588
3589     s->last_bits= put_bits_count(&s->pb);
3590     switch(s->out_format) {
3591     case FMT_MJPEG:
3592         if (CONFIG_MJPEG_ENCODER)
3593             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3594                                            s->pred, s->intra_matrix);
3595         break;
3596     case FMT_H261:
3597         if (CONFIG_H261_ENCODER)
3598             ff_h261_encode_picture_header(s, picture_number);
3599         break;
3600     case FMT_H263:
3601         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3602             ff_wmv2_encode_picture_header(s, picture_number);
3603         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3604             ff_msmpeg4_encode_picture_header(s, picture_number);
3605         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3606             ff_mpeg4_encode_picture_header(s, picture_number);
3607         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3608             ret = ff_rv10_encode_picture_header(s, picture_number);
3609             if (ret < 0)
3610                 return ret;
3611         }
3612         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3613             ff_rv20_encode_picture_header(s, picture_number);
3614         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3615             ff_flv_encode_picture_header(s, picture_number);
3616         else if (CONFIG_H263_ENCODER)
3617             ff_h263_encode_picture_header(s, picture_number);
3618         break;
3619     case FMT_MPEG1:
3620         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3621             ff_mpeg1_encode_picture_header(s, picture_number);
3622         break;
3623     default:
3624         assert(0);
3625     }
3626     bits= put_bits_count(&s->pb);
3627     s->header_bits= bits - s->last_bits;
3628
3629     for(i=1; i<context_count; i++){
3630         update_duplicate_context_after_me(s->thread_context[i], s);
3631     }
3632     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3633     for(i=1; i<context_count; i++){
3634         merge_context_after_encode(s, s->thread_context[i]);
3635     }
3636     emms_c();
3637     return 0;
3638 }
3639
3640 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3641     const int intra= s->mb_intra;
3642     int i;
3643
3644     s->dct_count[intra]++;
3645
3646     for(i=0; i<64; i++){
3647         int level= block[i];
3648
3649         if(level){
3650             if(level>0){
3651                 s->dct_error_sum[intra][i] += level;
3652                 level -= s->dct_offset[intra][i];
3653                 if(level<0) level=0;
3654             }else{
3655                 s->dct_error_sum[intra][i] -= level;
3656                 level += s->dct_offset[intra][i];
3657                 if(level>0) level=0;
3658             }
3659             block[i]= level;
3660         }
3661     }
3662 }
3663
3664 static int dct_quantize_trellis_c(MpegEncContext *s,
3665                                   int16_t *block, int n,
3666                                   int qscale, int *overflow){
3667     const int *qmat;
3668     const uint8_t *scantable= s->intra_scantable.scantable;
3669     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3670     int max=0;
3671     unsigned int threshold1, threshold2;
3672     int bias=0;
3673     int run_tab[65];
3674     int level_tab[65];
3675     int score_tab[65];
3676     int survivor[65];
3677     int survivor_count;
3678     int last_run=0;
3679     int last_level=0;
3680     int last_score= 0;
3681     int last_i;
3682     int coeff[2][64];
3683     int coeff_count[64];
3684     int qmul, qadd, start_i, last_non_zero, i, dc;
3685     const int esc_length= s->ac_esc_length;
3686     uint8_t * length;
3687     uint8_t * last_length;
3688     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3689
3690     s->fdsp.fdct(block);
3691
3692     if(s->dct_error_sum)
3693         s->denoise_dct(s, block);
3694     qmul= qscale*16;
3695     qadd= ((qscale-1)|1)*8;
3696
3697     if (s->mb_intra) {
3698         int q;
3699         if (!s->h263_aic) {
3700             if (n < 4)
3701                 q = s->y_dc_scale;
3702             else
3703                 q = s->c_dc_scale;
3704             q = q << 3;
3705         } else{
3706             /* For AIC we skip quant/dequant of INTRADC */
3707             q = 1 << 3;
3708             qadd=0;
3709         }
3710
3711         /* note: block[0] is assumed to be positive */
3712         block[0] = (block[0] + (q >> 1)) / q;
3713         start_i = 1;
3714         last_non_zero = 0;
3715         qmat = s->q_intra_matrix[qscale];
3716         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3717             bias= 1<<(QMAT_SHIFT-1);
3718         length     = s->intra_ac_vlc_length;
3719         last_length= s->intra_ac_vlc_last_length;
3720     } else {
3721         start_i = 0;
3722         last_non_zero = -1;
3723         qmat = s->q_inter_matrix[qscale];
3724         length     = s->inter_ac_vlc_length;
3725         last_length= s->inter_ac_vlc_last_length;
3726     }
3727     last_i= start_i;
3728
3729     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3730     threshold2= (threshold1<<1);
3731
3732     for(i=63; i>=start_i; i--) {
3733         const int j = scantable[i];
3734         int level = block[j] * qmat[j];
3735
3736         if(((unsigned)(level+threshold1))>threshold2){
3737             last_non_zero = i;
3738             break;
3739         }
3740     }
3741
3742     for(i=start_i; i<=last_non_zero; i++) {
3743         const int j = scantable[i];
3744         int level = block[j] * qmat[j];
3745
3746 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3747 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3748         if(((unsigned)(level+threshold1))>threshold2){
3749             if(level>0){
3750                 level= (bias + level)>>QMAT_SHIFT;
3751                 coeff[0][i]= level;
3752                 coeff[1][i]= level-1;
3753 //                coeff[2][k]= level-2;
3754             }else{
3755                 level= (bias - level)>>QMAT_SHIFT;
3756                 coeff[0][i]= -level;
3757                 coeff[1][i]= -level+1;
3758 //                coeff[2][k]= -level+2;
3759             }
3760             coeff_count[i]= FFMIN(level, 2);
3761             assert(coeff_count[i]);
3762             max |=level;
3763         }else{
3764             coeff[0][i]= (level>>31)|1;
3765             coeff_count[i]= 1;
3766         }
3767     }
3768
3769     *overflow= s->max_qcoeff < max; //overflow might have happened
3770
3771     if(last_non_zero < start_i){
3772         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3773         return last_non_zero;
3774     }
3775
3776     score_tab[start_i]= 0;
3777     survivor[0]= start_i;
3778     survivor_count= 1;
3779
3780     for(i=start_i; i<=last_non_zero; i++){
3781         int level_index, j, zero_distortion;
3782         int dct_coeff= FFABS(block[ scantable[i] ]);
3783         int best_score=256*256*256*120;
3784
3785         if (s->fdsp.fdct == ff_fdct_ifast)
3786             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3787         zero_distortion= dct_coeff*dct_coeff;
3788
3789         for(level_index=0; level_index < coeff_count[i]; level_index++){
3790             int distortion;
3791             int level= coeff[level_index][i];
3792             const int alevel= FFABS(level);
3793             int unquant_coeff;
3794
3795             assert(level);
3796
3797             if(s->out_format == FMT_H263){
3798                 unquant_coeff= alevel*qmul + qadd;
3799             }else{ //MPEG1
3800                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3801                 if(s->mb_intra){
3802                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3803                         unquant_coeff =   (unquant_coeff - 1) | 1;
3804                 }else{
3805                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3806                         unquant_coeff =   (unquant_coeff - 1) | 1;
3807                 }
3808                 unquant_coeff<<= 3;
3809             }
3810
3811             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3812             level+=64;
3813             if((level&(~127)) == 0){
3814                 for(j=survivor_count-1; j>=0; j--){
3815                     int run= i - survivor[j];
3816                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3817                     score += score_tab[i-run];
3818
3819                     if(score < best_score){
3820                         best_score= score;
3821                         run_tab[i+1]= run;
3822                         level_tab[i+1]= level-64;
3823                     }
3824                 }
3825
3826                 if(s->out_format == FMT_H263){
3827                     for(j=survivor_count-1; j>=0; j--){
3828                         int run= i - survivor[j];
3829                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3830                         score += score_tab[i-run];
3831                         if(score < last_score){
3832                             last_score= score;
3833                             last_run= run;
3834                             last_level= level-64;
3835                             last_i= i+1;
3836                         }
3837                     }
3838                 }
3839             }else{
3840                 distortion += esc_length*lambda;
3841                 for(j=survivor_count-1; j>=0; j--){
3842                     int run= i - survivor[j];
3843                     int score= distortion + score_tab[i-run];
3844
3845                     if(score < best_score){
3846                         best_score= score;
3847                         run_tab[i+1]= run;
3848                         level_tab[i+1]= level-64;
3849                     }
3850                 }
3851
3852                 if(s->out_format == FMT_H263){
3853                   for(j=survivor_count-1; j>=0; j--){
3854                         int run= i - survivor[j];
3855                         int score= distortion + score_tab[i-run];
3856                         if(score < last_score){
3857                             last_score= score;
3858                             last_run= run;
3859                             last_level= level-64;
3860                             last_i= i+1;
3861                         }
3862                     }
3863                 }
3864             }
3865         }
3866
3867         score_tab[i+1]= best_score;
3868
3869         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3870         if(last_non_zero <= 27){
3871             for(; survivor_count; survivor_count--){
3872                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3873                     break;
3874             }
3875         }else{
3876             for(; survivor_count; survivor_count--){
3877                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3878                     break;
3879             }
3880         }
3881
3882         survivor[ survivor_count++ ]= i+1;
3883     }
3884
3885     if(s->out_format != FMT_H263){
3886         last_score= 256*256*256*120;
3887         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3888             int score= score_tab[i];
3889             if(i) score += lambda*2; //FIXME exacter?
3890
3891             if(score < last_score){
3892                 last_score= score;
3893                 last_i= i;
3894                 last_level= level_tab[i];
3895                 last_run= run_tab[i];
3896             }
3897         }
3898     }
3899
3900     s->coded_score[n] = last_score;
3901
3902     dc= FFABS(block[0]);
3903     last_non_zero= last_i - 1;
3904     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3905
3906     if(last_non_zero < start_i)
3907         return last_non_zero;
3908
3909     if(last_non_zero == 0 && start_i == 0){
3910         int best_level= 0;
3911         int best_score= dc * dc;
3912
3913         for(i=0; i<coeff_count[0]; i++){
3914             int level= coeff[i][0];
3915             int alevel= FFABS(level);
3916             int unquant_coeff, score, distortion;
3917
3918             if(s->out_format == FMT_H263){
3919                     unquant_coeff= (alevel*qmul + qadd)>>3;
3920             }else{ //MPEG1
3921                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3922                     unquant_coeff =   (unquant_coeff - 1) | 1;
3923             }
3924             unquant_coeff = (unquant_coeff + 4) >> 3;
3925             unquant_coeff<<= 3 + 3;
3926
3927             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3928             level+=64;
3929             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3930             else                    score= distortion + esc_length*lambda;
3931
3932             if(score < best_score){
3933                 best_score= score;
3934                 best_level= level - 64;
3935             }
3936         }
3937         block[0]= best_level;
3938         s->coded_score[n] = best_score - dc*dc;
3939         if(best_level == 0) return -1;
3940         else                return last_non_zero;
3941     }
3942
3943     i= last_i;
3944     assert(last_level);
3945
3946     block[ perm_scantable[last_non_zero] ]= last_level;
3947     i -= last_run + 1;
3948
3949     for(; i>start_i; i -= run_tab[i] + 1){
3950         block[ perm_scantable[i-1] ]= level_tab[i];
3951     }
3952
3953     return last_non_zero;
3954 }
3955
3956 //#define REFINE_STATS 1
3957 static int16_t basis[64][64];
3958
3959 static void build_basis(uint8_t *perm){
3960     int i, j, x, y;
3961     emms_c();
3962     for(i=0; i<8; i++){
3963         for(j=0; j<8; j++){
3964             for(y=0; y<8; y++){
3965                 for(x=0; x<8; x++){
3966                     double s= 0.25*(1<<BASIS_SHIFT);
3967                     int index= 8*i + j;
3968                     int perm_index= perm[index];
3969                     if(i==0) s*= sqrt(0.5);
3970                     if(j==0) s*= sqrt(0.5);
3971                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3972                 }
3973             }
3974         }
3975     }
3976 }
3977
3978 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3979                         int16_t *block, int16_t *weight, int16_t *orig,
3980                         int n, int qscale){
3981     int16_t rem[64];
3982     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3983     const uint8_t *scantable= s->intra_scantable.scantable;
3984     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3985 //    unsigned int threshold1, threshold2;
3986 //    int bias=0;
3987     int run_tab[65];
3988     int prev_run=0;
3989     int prev_level=0;
3990     int qmul, qadd, start_i, last_non_zero, i, dc;
3991     uint8_t * length;
3992     uint8_t * last_length;
3993     int lambda;
3994     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3995 #ifdef REFINE_STATS
3996 static int count=0;
3997 static int after_last=0;
3998 static int to_zero=0;
3999 static int from_zero=0;
4000 static int raise=0;
4001 static int lower=0;
4002 static int messed_sign=0;
4003 #endif
4004
4005     if(basis[0][0] == 0)
4006         build_basis(s->idsp.idct_permutation);
4007
4008     qmul= qscale*2;
4009     qadd= (qscale-1)|1;
4010     if (s->mb_intra) {
4011         if (!s->h263_aic) {
4012             if (n < 4)
4013                 q = s->y_dc_scale;
4014             else
4015                 q = s->c_dc_scale;
4016         } else{
4017             /* For AIC we skip quant/dequant of INTRADC */
4018             q = 1;
4019             qadd=0;
4020         }
4021         q <<= RECON_SHIFT-3;
4022         /* note: block[0] is assumed to be positive */
4023         dc= block[0]*q;
4024 //        block[0] = (block[0] + (q >> 1)) / q;
4025         start_i = 1;
4026 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4027 //            bias= 1<<(QMAT_SHIFT-1);
4028         length     = s->intra_ac_vlc_length;
4029         last_length= s->intra_ac_vlc_last_length;
4030     } else {
4031         dc= 0;
4032         start_i = 0;
4033         length     = s->inter_ac_vlc_length;
4034         last_length= s->inter_ac_vlc_last_length;
4035     }
4036     last_non_zero = s->block_last_index[n];
4037
4038 #ifdef REFINE_STATS
4039 {START_TIMER
4040 #endif
4041     dc += (1<<(RECON_SHIFT-1));
4042     for(i=0; i<64; i++){
4043         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4044     }
4045 #ifdef REFINE_STATS
4046 STOP_TIMER("memset rem[]")}
4047 #endif
4048     sum=0;
4049     for(i=0; i<64; i++){
4050         int one= 36;
4051         int qns=4;
4052         int w;
4053
4054         w= FFABS(weight[i]) + qns*one;
4055         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4056
4057         weight[i] = w;
4058 //        w=weight[i] = (63*qns + (w/2)) / w;
4059
4060         assert(w>0);
4061         assert(w<(1<<6));
4062         sum += w*w;
4063     }
4064     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4065 #ifdef REFINE_STATS
4066 {START_TIMER
4067 #endif
4068     run=0;
4069     rle_index=0;
4070     for(i=start_i; i<=last_non_zero; i++){
4071         int j= perm_scantable[i];
4072         const int level= block[j];
4073         int coeff;
4074
4075         if(level){
4076             if(level<0) coeff= qmul*level - qadd;
4077             else        coeff= qmul*level + qadd;
4078             run_tab[rle_index++]=run;
4079             run=0;
4080
4081             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4082         }else{
4083             run++;
4084         }
4085     }
4086 #ifdef REFINE_STATS
4087 if(last_non_zero>0){
4088 STOP_TIMER("init rem[]")
4089 }
4090 }
4091
4092 {START_TIMER
4093 #endif
4094     for(;;){
4095         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4096         int best_coeff=0;
4097         int best_change=0;
4098         int run2, best_unquant_change=0, analyze_gradient;
4099 #ifdef REFINE_STATS
4100 {START_TIMER
4101 #endif
4102         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4103
4104         if(analyze_gradient){
4105 #ifdef REFINE_STATS
4106 {START_TIMER
4107 #endif
4108             for(i=0; i<64; i++){
4109                 int w= weight[i];
4110
4111                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4112             }
4113 #ifdef REFINE_STATS
4114 STOP_TIMER("rem*w*w")}
4115 {START_TIMER
4116 #endif
4117             s->fdsp.fdct(d1);
4118 #ifdef REFINE_STATS
4119 STOP_TIMER("dct")}
4120 #endif
4121         }
4122
4123         if(start_i){
4124             const int level= block[0];
4125             int change, old_coeff;
4126
4127             assert(s->mb_intra);
4128
4129             old_coeff= q*level;
4130
4131             for(change=-1; change<=1; change+=2){
4132                 int new_level= level + change;
4133                 int score, new_coeff;
4134
4135                 new_coeff= q*new_level;
4136                 if(new_coeff >= 2048 || new_coeff < 0)
4137                     continue;
4138
4139                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4140                                                   new_coeff - old_coeff);
4141                 if(score<best_score){
4142                     best_score= score;
4143                     best_coeff= 0;
4144                     best_change= change;
4145                     best_unquant_change= new_coeff - old_coeff;
4146                 }
4147             }
4148         }
4149
4150         run=0;
4151         rle_index=0;
4152         run2= run_tab[rle_index++];
4153         prev_level=0;
4154         prev_run=0;
4155
4156         for(i=start_i; i<64; i++){
4157             int j= perm_scantable[i];
4158             const int level= block[j];
4159             int change, old_coeff;
4160
4161             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4162                 break;
4163
4164             if(level){
4165                 if(level<0) old_coeff= qmul*level - qadd;
4166                 else        old_coeff= qmul*level + qadd;
4167                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4168             }else{
4169                 old_coeff=0;
4170                 run2--;
4171                 assert(run2>=0 || i >= last_non_zero );
4172             }
4173
4174             for(change=-1; change<=1; change+=2){
4175                 int new_level= level + change;
4176                 int score, new_coeff, unquant_change;
4177
4178                 score=0;
4179                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4180                    continue;
4181
4182                 if(new_level){
4183                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4184                     else            new_coeff= qmul*new_level + qadd;
4185                     if(new_coeff >= 2048 || new_coeff <= -2048)
4186                         continue;
4187                     //FIXME check for overflow
4188
4189                     if(level){
4190                         if(level < 63 && level > -63){
4191                             if(i < last_non_zero)
4192                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4193                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4194                             else
4195                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4196                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4197                         }
4198                     }else{
4199                         assert(FFABS(new_level)==1);
4200
4201                         if(analyze_gradient){
4202                             int g= d1[ scantable[i] ];
4203                             if(g && (g^new_level) >= 0)
4204                                 continue;
4205                         }
4206
4207                         if(i < last_non_zero){
4208                             int next_i= i + run2 + 1;
4209                             int next_level= block[ perm_scantable[next_i] ] + 64;
4210
4211                             if(next_level&(~127))
4212                                 next_level= 0;
4213
4214                             if(next_i < last_non_zero)
4215                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4216                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4217                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4218                             else
4219                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4220                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4221                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4222                         }else{
4223                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4224                             if(prev_level){
4225                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4226                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4227                             }
4228                         }
4229                     }
4230                 }else{
4231                     new_coeff=0;
4232                     assert(FFABS(level)==1);
4233
4234                     if(i < last_non_zero){
4235                         int next_i= i + run2 + 1;
4236                         int next_level= block[ perm_scantable[next_i] ] + 64;
4237
4238                         if(next_level&(~127))
4239                             next_level= 0;
4240
4241                         if(next_i < last_non_zero)
4242                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4243                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4244                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4245                         else
4246                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4247                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4248                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4249                     }else{
4250                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4251                         if(prev_level){
4252                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4253                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4254                         }
4255                     }
4256                 }
4257
4258                 score *= lambda;
4259
4260                 unquant_change= new_coeff - old_coeff;
4261                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4262
4263                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4264                                                    unquant_change);
4265                 if(score<best_score){
4266                     best_score= score;
4267                     best_coeff= i;
4268                     best_change= change;
4269                     best_unquant_change= unquant_change;
4270                 }
4271             }
4272             if(level){
4273                 prev_level= level + 64;
4274                 if(prev_level&(~127))
4275                     prev_level= 0;
4276                 prev_run= run;
4277                 run=0;
4278             }else{
4279                 run++;
4280             }
4281         }
4282 #ifdef REFINE_STATS
4283 STOP_TIMER("iterative step")}
4284 #endif
4285
4286         if(best_change){
4287             int j= perm_scantable[ best_coeff ];
4288
4289             block[j] += best_change;
4290
4291             if(best_coeff > last_non_zero){
4292                 last_non_zero= best_coeff;
4293                 assert(block[j]);
4294 #ifdef REFINE_STATS
4295 after_last++;
4296 #endif
4297             }else{
4298 #ifdef REFINE_STATS
4299 if(block[j]){
4300     if(block[j] - best_change){
4301         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4302             raise++;
4303         }else{
4304             lower++;
4305         }
4306     }else{
4307         from_zero++;
4308     }
4309 }else{
4310     to_zero++;
4311 }
4312 #endif
4313                 for(; last_non_zero>=start_i; last_non_zero--){
4314                     if(block[perm_scantable[last_non_zero]])
4315                         break;
4316                 }
4317             }
4318 #ifdef REFINE_STATS
4319 count++;
4320 if(256*256*256*64 % count == 0){
4321     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4322 }
4323 #endif
4324             run=0;
4325             rle_index=0;
4326             for(i=start_i; i<=last_non_zero; i++){
4327                 int j= perm_scantable[i];
4328                 const int level= block[j];
4329
4330                  if(level){
4331                      run_tab[rle_index++]=run;
4332                      run=0;
4333                  }else{
4334                      run++;
4335                  }
4336             }
4337
4338             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4339         }else{
4340             break;
4341         }
4342     }
4343 #ifdef REFINE_STATS
4344 if(last_non_zero>0){
4345 STOP_TIMER("iterative search")
4346 }
4347 }
4348 #endif
4349
4350     return last_non_zero;
4351 }
4352
4353 /**
4354  * Permute an 8x8 block according to permuatation.
4355  * @param block the block which will be permuted according to
4356  *              the given permutation vector
4357  * @param permutation the permutation vector
4358  * @param last the last non zero coefficient in scantable order, used to
4359  *             speed the permutation up
4360  * @param scantable the used scantable, this is only used to speed the
4361  *                  permutation up, the block is not (inverse) permutated
4362  *                  to scantable order!
4363  */
4364 static void block_permute(int16_t *block, uint8_t *permutation,
4365                           const uint8_t *scantable, int last)
4366 {
4367     int i;
4368     int16_t temp[64];
4369
4370     if (last <= 0)
4371         return;
4372     //FIXME it is ok but not clean and might fail for some permutations
4373     // if (permutation[1] == 1)
4374     // return;
4375
4376     for (i = 0; i <= last; i++) {
4377         const int j = scantable[i];
4378         temp[j] = block[j];
4379         block[j] = 0;
4380     }
4381
4382     for (i = 0; i <= last; i++) {
4383         const int j = scantable[i];
4384         const int perm_j = permutation[j];
4385         block[perm_j] = temp[j];
4386     }
4387 }
4388
4389 int ff_dct_quantize_c(MpegEncContext *s,
4390                         int16_t *block, int n,
4391                         int qscale, int *overflow)
4392 {
4393     int i, j, level, last_non_zero, q, start_i;
4394     const int *qmat;
4395     const uint8_t *scantable= s->intra_scantable.scantable;
4396     int bias;
4397     int max=0;
4398     unsigned int threshold1, threshold2;
4399
4400     s->fdsp.fdct(block);
4401
4402     if(s->dct_error_sum)
4403         s->denoise_dct(s, block);
4404
4405     if (s->mb_intra) {
4406         if (!s->h263_aic) {
4407             if (n < 4)
4408                 q = s->y_dc_scale;
4409             else
4410                 q = s->c_dc_scale;
4411             q = q << 3;
4412         } else
4413             /* For AIC we skip quant/dequant of INTRADC */
4414             q = 1 << 3;
4415
4416         /* note: block[0] is assumed to be positive */
4417         block[0] = (block[0] + (q >> 1)) / q;
4418         start_i = 1;
4419         last_non_zero = 0;
4420         qmat = s->q_intra_matrix[qscale];
4421         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4422     } else {
4423         start_i = 0;
4424         last_non_zero = -1;
4425         qmat = s->q_inter_matrix[qscale];
4426         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4427     }
4428     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4429     threshold2= (threshold1<<1);
4430     for(i=63;i>=start_i;i--) {
4431         j = scantable[i];
4432         level = block[j] * qmat[j];
4433
4434         if(((unsigned)(level+threshold1))>threshold2){
4435             last_non_zero = i;
4436             break;
4437         }else{
4438             block[j]=0;
4439         }
4440     }
4441     for(i=start_i; i<=last_non_zero; i++) {
4442         j = scantable[i];
4443         level = block[j] * qmat[j];
4444
4445 //        if(   bias+level >= (1<<QMAT_SHIFT)
4446 //           || bias-level >= (1<<QMAT_SHIFT)){
4447         if(((unsigned)(level+threshold1))>threshold2){
4448             if(level>0){
4449                 level= (bias + level)>>QMAT_SHIFT;
4450                 block[j]= level;
4451             }else{
4452                 level= (bias - level)>>QMAT_SHIFT;
4453                 block[j]= -level;
4454             }
4455             max |=level;
4456         }else{
4457             block[j]=0;
4458         }
4459     }
4460     *overflow= s->max_qcoeff < max; //overflow might have happened
4461
4462     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4463     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4464         block_permute(block, s->idsp.idct_permutation,
4465                       scantable, last_non_zero);
4466
4467     return last_non_zero;
4468 }
4469
4470 #define OFFSET(x) offsetof(MpegEncContext, x)
4471 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4472 static const AVOption h263_options[] = {
4473     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4474     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4475     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4476     FF_MPV_COMMON_OPTS
4477     { NULL },
4478 };
4479
4480 static const AVClass h263_class = {
4481     .class_name = "H.263 encoder",
4482     .item_name  = av_default_item_name,
4483     .option     = h263_options,
4484     .version    = LIBAVUTIL_VERSION_INT,
4485 };
4486
4487 AVCodec ff_h263_encoder = {
4488     .name           = "h263",
4489     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4490     .type           = AVMEDIA_TYPE_VIDEO,
4491     .id             = AV_CODEC_ID_H263,
4492     .priv_data_size = sizeof(MpegEncContext),
4493     .init           = ff_mpv_encode_init,
4494     .encode2        = ff_mpv_encode_picture,
4495     .close          = ff_mpv_encode_end,
4496     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4497     .priv_class     = &h263_class,
4498 };
4499
4500 static const AVOption h263p_options[] = {
4501     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4502     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4503     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4504     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4505     FF_MPV_COMMON_OPTS
4506     { NULL },
4507 };
4508 static const AVClass h263p_class = {
4509     .class_name = "H.263p encoder",
4510     .item_name  = av_default_item_name,
4511     .option     = h263p_options,
4512     .version    = LIBAVUTIL_VERSION_INT,
4513 };
4514
4515 AVCodec ff_h263p_encoder = {
4516     .name           = "h263p",
4517     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4518     .type           = AVMEDIA_TYPE_VIDEO,
4519     .id             = AV_CODEC_ID_H263P,
4520     .priv_data_size = sizeof(MpegEncContext),
4521     .init           = ff_mpv_encode_init,
4522     .encode2        = ff_mpv_encode_picture,
4523     .close          = ff_mpv_encode_end,
4524     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4525     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4526     .priv_class     = &h263p_class,
4527 };
4528
4529 static const AVClass msmpeg4v2_class = {
4530     .class_name = "msmpeg4v2 encoder",
4531     .item_name  = av_default_item_name,
4532     .option     = ff_mpv_generic_options,
4533     .version    = LIBAVUTIL_VERSION_INT,
4534 };
4535
4536 AVCodec ff_msmpeg4v2_encoder = {
4537     .name           = "msmpeg4v2",
4538     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4539     .type           = AVMEDIA_TYPE_VIDEO,
4540     .id             = AV_CODEC_ID_MSMPEG4V2,
4541     .priv_data_size = sizeof(MpegEncContext),
4542     .init           = ff_mpv_encode_init,
4543     .encode2        = ff_mpv_encode_picture,
4544     .close          = ff_mpv_encode_end,
4545     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4546     .priv_class     = &msmpeg4v2_class,
4547 };
4548
4549 static const AVClass msmpeg4v3_class = {
4550     .class_name = "msmpeg4v3 encoder",
4551     .item_name  = av_default_item_name,
4552     .option     = ff_mpv_generic_options,
4553     .version    = LIBAVUTIL_VERSION_INT,
4554 };
4555
4556 AVCodec ff_msmpeg4v3_encoder = {
4557     .name           = "msmpeg4",
4558     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4559     .type           = AVMEDIA_TYPE_VIDEO,
4560     .id             = AV_CODEC_ID_MSMPEG4V3,
4561     .priv_data_size = sizeof(MpegEncContext),
4562     .init           = ff_mpv_encode_init,
4563     .encode2        = ff_mpv_encode_picture,
4564     .close          = ff_mpv_encode_end,
4565     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4566     .priv_class     = &msmpeg4v3_class,
4567 };
4568
4569 static const AVClass wmv1_class = {
4570     .class_name = "wmv1 encoder",
4571     .item_name  = av_default_item_name,
4572     .option     = ff_mpv_generic_options,
4573     .version    = LIBAVUTIL_VERSION_INT,
4574 };
4575
4576 AVCodec ff_wmv1_encoder = {
4577     .name           = "wmv1",
4578     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4579     .type           = AVMEDIA_TYPE_VIDEO,
4580     .id             = AV_CODEC_ID_WMV1,
4581     .priv_data_size = sizeof(MpegEncContext),
4582     .init           = ff_mpv_encode_init,
4583     .encode2        = ff_mpv_encode_picture,
4584     .close          = ff_mpv_encode_end,
4585     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4586     .priv_class     = &wmv1_class,
4587 };