git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "mpegvideodata.h"
  44 #include "h261.h"
  45 #include "h263.h"
  46 #include "h263data.h"
  47 #include "mjpegenc_common.h"
  48 #include "mathops.h"
  49 #include "mpegutils.h"
  50 #include "mjpegenc.h"
  51 #include "msmpeg4.h"
  52 #include "pixblockdsp.h"
  53 #include "qpeldsp.h"
  54 #include "faandct.h"
  55 #include "thread.h"
  56 #include "aandcttab.h"
  57 #include "flv.h"
  58 #include "mpeg4video.h"
  59 #include "internal.h"
  60 #include "bytestream.h"
  61 #include "wmv2.h"
  62 #include "rv10.h"
  63 #include <limits.h>
  64
  65 #define QUANT_BIAS_SHIFT 8
  66
  67 #define QMAT_SHIFT_MMX 16
  68 #define QMAT_SHIFT 22
  69
  70 static int encode_picture(MpegEncContext *s, int picture_number);
  71 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  72 static int sse_mb(MpegEncContext *s);
  73 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  74 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  75
  76 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  77 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  78
  79 const AVOption ff_mpv_generic_options[] = {
  80     FF_MPV_COMMON_OPTS
  81     { NULL },
  82 };
  83
  84 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  85                        uint16_t (*qmat16)[2][64],
  86                        const uint16_t *quant_matrix,
  87                        int bias, int qmin, int qmax, int intra)
  88 {
  89     FDCTDSPContext *fdsp = &s->fdsp;
  90     int qscale;
  91     int shift = 0;
  92
  93     for (qscale = qmin; qscale <= qmax; qscale++) {
  94         int i;
  95         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  96 #if CONFIG_FAANDCT
  97             fdsp->fdct == ff_faandct            ||
  98 #endif /* CONFIG_FAANDCT */
  99             fdsp->fdct == ff_jpeg_fdct_islow_10) {
 100             for (i = 0; i < 64; i++) {
 101                 const int j = s->idsp.idct_permutation[i];
 102                 int64_t den = (int64_t) qscale * quant_matrix[j];
 103                 /* 16 <= qscale * quant_matrix[i] <= 7905
 104                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 105                  *             19952 <=              x  <= 249205026
 106                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 107                  *           3444240 >= (1 << 36) / (x) >= 275 */
 108
 109                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 110             }
 111         } else if (fdsp->fdct == ff_fdct_ifast) {
 112             for (i = 0; i < 64; i++) {
 113                 const int j = s->idsp.idct_permutation[i];
 114                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
 115                 /* 16 <= qscale * quant_matrix[i] <= 7905
 116                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 117                  *             19952 <=              x  <= 249205026
 118                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 119                  *           3444240 >= (1 << 36) / (x) >= 275 */
 120
 121                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
 122             }
 123         } else {
 124             for (i = 0; i < 64; i++) {
 125                 const int j = s->idsp.idct_permutation[i];
 126                 int64_t den = (int64_t) qscale * quant_matrix[j];
 127                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 128                  * Assume x = qscale * quant_matrix[i]
 129                  * So             16 <=              x  <= 7905
 130                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 131                  * so          32768 >= (1 << 19) / (x) >= 67 */
 132                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 133                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 134                 //                    (qscale * quant_matrix[i]);
 135                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
 136
 137                 if (qmat16[qscale][0][i] == 0 ||
 138                     qmat16[qscale][0][i] == 128 * 256)
 139                     qmat16[qscale][0][i] = 128 * 256 - 1;
 140                 qmat16[qscale][1][i] =
 141                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 142                                 qmat16[qscale][0][i]);
 143             }
 144         }
 145
 146         for (i = intra; i < 64; i++) {
 147             int64_t max = 8191;
 148             if (fdsp->fdct == ff_fdct_ifast) {
 149                 max = (8191LL * ff_aanscales[i]) >> 14;
 150             }
 151             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 152                 shift++;
 153             }
 154         }
 155     }
 156     if (shift) {
 157         av_log(NULL, AV_LOG_INFO,
 158                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 159                QMAT_SHIFT - shift);
 160     }
 161 }
 162
 163 static inline void update_qscale(MpegEncContext *s)
 164 {
 165     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 166                 (FF_LAMBDA_SHIFT + 7);
 167     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 168
 169     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 170                  FF_LAMBDA_SHIFT;
 171 }
 172
 173 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 174 {
 175     int i;
 176
 177     if (matrix) {
 178         put_bits(pb, 1, 1);
 179         for (i = 0; i < 64; i++) {
 180             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 181         }
 182     } else
 183         put_bits(pb, 1, 0);
 184 }
 185
 186 /**
 187  * init s->current_picture.qscale_table from s->lambda_table
 188  */
 189 void ff_init_qscale_tab(MpegEncContext *s)
 190 {
 191     int8_t * const qscale_table = s->current_picture.qscale_table;
 192     int i;
 193
 194     for (i = 0; i < s->mb_num; i++) {
 195         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 196         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 197         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 198                                                   s->avctx->qmax);
 199     }
 200 }
 201
 202 static void update_duplicate_context_after_me(MpegEncContext *dst,
 203                                               MpegEncContext *src)
 204 {
 205 #define COPY(a) dst->a= src->a
 206     COPY(pict_type);
 207     COPY(current_picture);
 208     COPY(f_code);
 209     COPY(b_code);
 210     COPY(qscale);
 211     COPY(lambda);
 212     COPY(lambda2);
 213     COPY(picture_in_gop_number);
 214     COPY(gop_picture_number);
 215     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 216     COPY(progressive_frame);    // FIXME don't set in encode_header
 217     COPY(partitioned_frame);    // FIXME don't set in encode_header
 218 #undef COPY
 219 }
 220
 221 /**
 222  * Set the given MpegEncContext to defaults for encoding.
 223  * the changed fields will not depend upon the prior state of the MpegEncContext.
 224  */
 225 static void mpv_encode_defaults(MpegEncContext *s)
 226 {
 227     int i;
 228     ff_mpv_common_defaults(s);
 229
 230     for (i = -16; i < 16; i++) {
 231         default_fcode_tab[i + MAX_MV] = 1;
 232     }
 233     s->me.mv_penalty = default_mv_penalty;
 234     s->fcode_tab     = default_fcode_tab;
 235
 236     s->input_picture_number  = 0;
 237     s->picture_in_gop_number = 0;
 238 }
 239
 240 /* init video encoder */
 241 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 242 {
 243     MpegEncContext *s = avctx->priv_data;
 244     AVCPBProperties *cpb_props;
 245     int i, ret, format_supported;
 246
 247     mpv_encode_defaults(s);
 248
 249     switch (avctx->codec_id) {
 250     case AV_CODEC_ID_MPEG2VIDEO:
 251         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 252             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 253             av_log(avctx, AV_LOG_ERROR,
 254                    "only YUV420 and YUV422 are supported\n");
 255             return -1;
 256         }
 257         break;
 258     case AV_CODEC_ID_MJPEG:
 259         format_supported = 0;
 260         /* JPEG color space */
 261         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 262             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 263             (avctx->color_range == AVCOL_RANGE_JPEG &&
 264              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 265               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
 266             format_supported = 1;
 267         /* MPEG color space */
 268         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 269                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 270                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
 271             format_supported = 1;
 272
 273         if (!format_supported) {
 274             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 275             return -1;
 276         }
 277         break;
 278     default:
 279         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 280             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 281             return -1;
 282         }
 283     }
 284
 285     switch (avctx->pix_fmt) {
 286     case AV_PIX_FMT_YUVJ422P:
 287     case AV_PIX_FMT_YUV422P:
 288         s->chroma_format = CHROMA_422;
 289         break;
 290     case AV_PIX_FMT_YUVJ420P:
 291     case AV_PIX_FMT_YUV420P:
 292     default:
 293         s->chroma_format = CHROMA_420;
 294         break;
 295     }
 296
 297     s->bit_rate = avctx->bit_rate;
 298     s->width    = avctx->width;
 299     s->height   = avctx->height;
 300     if (avctx->gop_size > 600 &&
 301         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 302         av_log(avctx, AV_LOG_ERROR,
 303                "Warning keyframe interval too large! reducing it ...\n");
 304         avctx->gop_size = 600;
 305     }
 306     s->gop_size     = avctx->gop_size;
 307     s->avctx        = avctx;
 308     if (avctx->max_b_frames > MAX_B_FRAMES) {
 309         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 310                "is %d.\n", MAX_B_FRAMES);
 311     }
 312     s->max_b_frames = avctx->max_b_frames;
 313     s->codec_id     = avctx->codec->id;
 314     s->strict_std_compliance = avctx->strict_std_compliance;
 315     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
 316     s->mpeg_quant         = avctx->mpeg_quant;
 317     s->rtp_mode           = !!avctx->rtp_payload_size;
 318     s->intra_dc_precision = avctx->intra_dc_precision;
 319     s->user_specified_pts = AV_NOPTS_VALUE;
 320
 321     if (s->gop_size <= 1) {
 322         s->intra_only = 1;
 323         s->gop_size   = 12;
 324     } else {
 325         s->intra_only = 0;
 326     }
 327
 328 #if FF_API_MOTION_EST
 329 FF_DISABLE_DEPRECATION_WARNINGS
 330     s->me_method = avctx->me_method;
 331 FF_ENABLE_DEPRECATION_WARNINGS
 332 #endif
 333
 334     /* Fixed QSCALE */
 335     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
 336
 337 #if FF_API_MPV_OPT
 338     FF_DISABLE_DEPRECATION_WARNINGS
 339     if (avctx->border_masking != 0.0)
 340         s->border_masking = avctx->border_masking;
 341     FF_ENABLE_DEPRECATION_WARNINGS
 342 #endif
 343
 344     s->adaptive_quant = (s->avctx->lumi_masking ||
 345                          s->avctx->dark_masking ||
 346                          s->avctx->temporal_cplx_masking ||
 347                          s->avctx->spatial_cplx_masking  ||
 348                          s->avctx->p_masking      ||
 349                          s->border_masking ||
 350                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 351                         !s->fixed_qscale;
 352
 353     s->loop_filter = !!(s->avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
 354
 355     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 356         av_log(avctx, AV_LOG_ERROR,
 357                "a vbv buffer size is needed, "
 358                "for encoding with a maximum bitrate\n");
 359         return -1;
 360     }
 361
 362     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 363         av_log(avctx, AV_LOG_INFO,
 364                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 365     }
 366
 367     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 368         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 369         return -1;
 370     }
 371
 372     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 373         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 374         return -1;
 375     }
 376
 377     if (avctx->rc_max_rate &&
 378         avctx->rc_max_rate == avctx->bit_rate &&
 379         avctx->rc_max_rate != avctx->rc_min_rate) {
 380         av_log(avctx, AV_LOG_INFO,
 381                "impossible bitrate constraints, this will fail\n");
 382     }
 383
 384     if (avctx->rc_buffer_size &&
 385         avctx->bit_rate * (int64_t)avctx->time_base.num >
 386             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 387         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 388         return -1;
 389     }
 390
 391     if (!s->fixed_qscale &&
 392         avctx->bit_rate * av_q2d(avctx->time_base) >
 393             avctx->bit_rate_tolerance) {
 394         av_log(avctx, AV_LOG_ERROR,
 395                "bitrate tolerance too small for bitrate\n");
 396         return -1;
 397     }
 398
 399     if (s->avctx->rc_max_rate &&
 400         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 401         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 402          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 403         90000LL * (avctx->rc_buffer_size - 1) >
 404             s->avctx->rc_max_rate * 0xFFFFLL) {
 405         av_log(avctx, AV_LOG_INFO,
 406                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 407                "specified vbv buffer is too large for the given bitrate!\n");
 408     }
 409
 410     if ((s->avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
 411         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 412         s->codec_id != AV_CODEC_ID_FLV1) {
 413         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 414         return -1;
 415     }
 416
 417     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 418         av_log(avctx, AV_LOG_ERROR,
 419                "OBMC is only supported with simple mb decision\n");
 420         return -1;
 421     }
 422
 423     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 424         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 425         return -1;
 426     }
 427
 428     if (s->max_b_frames                    &&
 429         s->codec_id != AV_CODEC_ID_MPEG4      &&
 430         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 431         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 432         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 433         return -1;
 434     }
 435
 436     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 437          s->codec_id == AV_CODEC_ID_H263  ||
 438          s->codec_id == AV_CODEC_ID_H263P) &&
 439         (avctx->sample_aspect_ratio.num > 255 ||
 440          avctx->sample_aspect_ratio.den > 255)) {
 441         av_log(avctx, AV_LOG_ERROR,
 442                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
 443                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 444         return -1;
 445     }
 446
 447     if ((s->avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
 448         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 449         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 450         return -1;
 451     }
 452
 453     // FIXME mpeg2 uses that too
 454     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
 455         av_log(avctx, AV_LOG_ERROR,
 456                "mpeg2 style quantization not supported by codec\n");
 457         return -1;
 458     }
 459
 460     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 461         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 462         return -1;
 463     }
 464
 465     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 466         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 467         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 468         return -1;
 469     }
 470
 471     if (s->avctx->scenechange_threshold < 1000000000 &&
 472         (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
 473         av_log(avctx, AV_LOG_ERROR,
 474                "closed gop with scene change detection are not supported yet, "
 475                "set threshold to 1000000000\n");
 476         return -1;
 477     }
 478
 479     if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
 480         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 481             av_log(avctx, AV_LOG_ERROR,
 482                   "low delay forcing is only available for mpeg2\n");
 483             return -1;
 484         }
 485         if (s->max_b_frames != 0) {
 486             av_log(avctx, AV_LOG_ERROR,
 487                    "b frames cannot be used with low delay\n");
 488             return -1;
 489         }
 490     }
 491
 492     if (s->q_scale_type == 1) {
 493         if (avctx->qmax > 12) {
 494             av_log(avctx, AV_LOG_ERROR,
 495                    "non linear quant only supports qmax <= 12 currently\n");
 496             return -1;
 497         }
 498     }
 499
 500     if (avctx->slices > 1 &&
 501         (avctx->codec_id == AV_CODEC_ID_FLV1 || avctx->codec_id == AV_CODEC_ID_H261)) {
 502         av_log(avctx, AV_LOG_ERROR, "Multiple slices are not supported by this codec\n");
 503         return AVERROR(EINVAL);
 504     }
 505
 506     if (s->avctx->thread_count > 1         &&
 507         s->codec_id != AV_CODEC_ID_MPEG4      &&
 508         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 509         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 510         (s->codec_id != AV_CODEC_ID_H263P)) {
 511         av_log(avctx, AV_LOG_ERROR,
 512                "multi threaded encoding not supported by codec\n");
 513         return -1;
 514     }
 515
 516     if (s->avctx->thread_count < 1) {
 517         av_log(avctx, AV_LOG_ERROR,
 518                "automatic thread number detection not supported by codec,"
 519                "patch welcome\n");
 520         return -1;
 521     }
 522
 523     if (!avctx->time_base.den || !avctx->time_base.num) {
 524         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 525         return -1;
 526     }
 527
 528 #if FF_API_PRIVATE_OPT
 529 FF_DISABLE_DEPRECATION_WARNINGS
 530     if (avctx->b_frame_strategy)
 531         s->b_frame_strategy = avctx->b_frame_strategy;
 532     if (avctx->b_sensitivity != 40)
 533         s->b_sensitivity = avctx->b_sensitivity;
 534 FF_ENABLE_DEPRECATION_WARNINGS
 535 #endif
 536
 537     if (s->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
 538         av_log(avctx, AV_LOG_INFO,
 539                "notice: b_frame_strategy only affects the first pass\n");
 540         s->b_frame_strategy = 0;
 541     }
 542
 543     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 544     if (i > 1) {
 545         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 546         avctx->time_base.den /= i;
 547         avctx->time_base.num /= i;
 548         //return -1;
 549     }
 550
 551     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 552         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
 553         // (a + x * 3 / 8) / x
 554         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 555         s->inter_quant_bias = 0;
 556     } else {
 557         s->intra_quant_bias = 0;
 558         // (a - x / 4) / x
 559         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 560     }
 561
 562 #if FF_API_QUANT_BIAS
 563 FF_DISABLE_DEPRECATION_WARNINGS
 564     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 565         s->intra_quant_bias = avctx->intra_quant_bias;
 566     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 567         s->inter_quant_bias = avctx->inter_quant_bias;
 568 FF_ENABLE_DEPRECATION_WARNINGS
 569 #endif
 570
 571     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 572         s->avctx->time_base.den > (1 << 16) - 1) {
 573         av_log(avctx, AV_LOG_ERROR,
 574                "timebase %d/%d not supported by MPEG 4 standard, "
 575                "the maximum admitted value for the timebase denominator "
 576                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 577                (1 << 16) - 1);
 578         return -1;
 579     }
 580     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 581
 582     switch (avctx->codec->id) {
 583     case AV_CODEC_ID_MPEG1VIDEO:
 584         s->out_format = FMT_MPEG1;
 585         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
 586         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 587         break;
 588     case AV_CODEC_ID_MPEG2VIDEO:
 589         s->out_format = FMT_MPEG1;
 590         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
 591         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 592         s->rtp_mode   = 1;
 593         break;
 594     case AV_CODEC_ID_MJPEG:
 595         s->out_format = FMT_MJPEG;
 596         s->intra_only = 1; /* force intra only for jpeg */
 597         if (!CONFIG_MJPEG_ENCODER ||
 598             ff_mjpeg_encode_init(s) < 0)
 599             return -1;
 600         avctx->delay = 0;
 601         s->low_delay = 1;
 602         break;
 603     case AV_CODEC_ID_H261:
 604         if (!CONFIG_H261_ENCODER)
 605             return -1;
 606         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 607             av_log(avctx, AV_LOG_ERROR,
 608                    "The specified picture size of %dx%d is not valid for the "
 609                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 610                     s->width, s->height);
 611             return -1;
 612         }
 613         s->out_format = FMT_H261;
 614         avctx->delay  = 0;
 615         s->low_delay  = 1;
 616         s->rtp_mode   = 0; /* Sliced encoding not supported */
 617         break;
 618     case AV_CODEC_ID_H263:
 619         if (!CONFIG_H263_ENCODER)
 620         return -1;
 621         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 622                              s->width, s->height) == 8) {
 623             av_log(avctx, AV_LOG_INFO,
 624                    "The specified picture size of %dx%d is not valid for "
 625                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 626                    "352x288, 704x576, and 1408x1152."
 627                    "Try H.263+.\n", s->width, s->height);
 628             return -1;
 629         }
 630         s->out_format = FMT_H263;
 631         avctx->delay  = 0;
 632         s->low_delay  = 1;
 633         break;
 634     case AV_CODEC_ID_H263P:
 635         s->out_format = FMT_H263;
 636         s->h263_plus  = 1;
 637         /* Fx */
 638         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
 639         s->modified_quant  = s->h263_aic;
 640         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 641         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 642
 643         /* /Fx */
 644         /* These are just to be sure */
 645         avctx->delay = 0;
 646         s->low_delay = 1;
 647         break;
 648     case AV_CODEC_ID_FLV1:
 649         s->out_format      = FMT_H263;
 650         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 651         s->unrestricted_mv = 1;
 652         s->rtp_mode  = 0; /* don't allow GOB */
 653         avctx->delay = 0;
 654         s->low_delay = 1;
 655         break;
 656     case AV_CODEC_ID_RV10:
 657         s->out_format = FMT_H263;
 658         avctx->delay  = 0;
 659         s->low_delay  = 1;
 660         break;
 661     case AV_CODEC_ID_RV20:
 662         s->out_format      = FMT_H263;
 663         avctx->delay       = 0;
 664         s->low_delay       = 1;
 665         s->modified_quant  = 1;
 666         s->h263_aic        = 1;
 667         s->h263_plus       = 1;
 668         s->loop_filter     = 1;
 669         s->unrestricted_mv = 0;
 670         break;
 671     case AV_CODEC_ID_MPEG4:
 672         s->out_format      = FMT_H263;
 673         s->h263_pred       = 1;
 674         s->unrestricted_mv = 1;
 675         s->low_delay       = s->max_b_frames ? 0 : 1;
 676         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 677         break;
 678     case AV_CODEC_ID_MSMPEG4V2:
 679         s->out_format      = FMT_H263;
 680         s->h263_pred       = 1;
 681         s->unrestricted_mv = 1;
 682         s->msmpeg4_version = 2;
 683         avctx->delay       = 0;
 684         s->low_delay       = 1;
 685         break;
 686     case AV_CODEC_ID_MSMPEG4V3:
 687         s->out_format        = FMT_H263;
 688         s->h263_pred         = 1;
 689         s->unrestricted_mv   = 1;
 690         s->msmpeg4_version   = 3;
 691         s->flipflop_rounding = 1;
 692         avctx->delay         = 0;
 693         s->low_delay         = 1;
 694         break;
 695     case AV_CODEC_ID_WMV1:
 696         s->out_format        = FMT_H263;
 697         s->h263_pred         = 1;
 698         s->unrestricted_mv   = 1;
 699         s->msmpeg4_version   = 4;
 700         s->flipflop_rounding = 1;
 701         avctx->delay         = 0;
 702         s->low_delay         = 1;
 703         break;
 704     case AV_CODEC_ID_WMV2:
 705         s->out_format        = FMT_H263;
 706         s->h263_pred         = 1;
 707         s->unrestricted_mv   = 1;
 708         s->msmpeg4_version   = 5;
 709         s->flipflop_rounding = 1;
 710         avctx->delay         = 0;
 711         s->low_delay         = 1;
 712         break;
 713     default:
 714         return -1;
 715     }
 716
 717     avctx->has_b_frames = !s->low_delay;
 718
 719     s->encoding = 1;
 720
 721     s->progressive_frame    =
 722     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
 723                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
 724                                 s->alternate_scan);
 725
 726     /* init */
 727     ff_mpv_idct_init(s);
 728     if (ff_mpv_common_init(s) < 0)
 729         return -1;
 730
 731     if (ARCH_X86)
 732         ff_mpv_encode_init_x86(s);
 733
 734     ff_fdctdsp_init(&s->fdsp, avctx);
 735     ff_me_cmp_init(&s->mecc, avctx);
 736     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 737     ff_pixblockdsp_init(&s->pdsp, avctx);
 738     ff_qpeldsp_init(&s->qdsp);
 739
 740     if (s->msmpeg4_version) {
 741         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 742                           2 * 2 * (MAX_LEVEL + 1) *
 743                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 744     }
 745     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 746
 747     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 748     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 749     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 750     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 751     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 752                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 753     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 754                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 755
 756     if (s->avctx->noise_reduction) {
 757         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 758                           2 * 64 * sizeof(uint16_t), fail);
 759     }
 760
 761     if (CONFIG_H263_ENCODER)
 762         ff_h263dsp_init(&s->h263dsp);
 763     if (!s->dct_quantize)
 764         s->dct_quantize = ff_dct_quantize_c;
 765     if (!s->denoise_dct)
 766         s->denoise_dct  = denoise_dct_c;
 767     s->fast_dct_quantize = s->dct_quantize;
 768     if (avctx->trellis)
 769         s->dct_quantize  = dct_quantize_trellis_c;
 770
 771     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 772         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 773
 774     if (s->slice_context_count > 1) {
 775         s->rtp_mode = 1;
 776
 777         if (avctx->codec_id == AV_CODEC_ID_H263 || avctx->codec_id == AV_CODEC_ID_H263P)
 778             s->h263_slice_structured = 1;
 779     }
 780
 781     s->quant_precision = 5;
 782
 783     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 784     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
 785
 786     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 787         ff_h261_encode_init(s);
 788     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 789         ff_h263_encode_init(s);
 790     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 791         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
 792             return ret;
 793     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 794         && s->out_format == FMT_MPEG1)
 795         ff_mpeg1_encode_init(s);
 796
 797     /* init q matrix */
 798     for (i = 0; i < 64; i++) {
 799         int j = s->idsp.idct_permutation[i];
 800         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 801             s->mpeg_quant) {
 802             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 803             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 804         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 805             s->intra_matrix[j] =
 806             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 807         } else {
 808             /* mpeg1/2 */
 809             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 810             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 811         }
 812         if (s->avctx->intra_matrix)
 813             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 814         if (s->avctx->inter_matrix)
 815             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 816     }
 817
 818     /* precompute matrix */
 819     /* for mjpeg, we do include qscale in the matrix */
 820     if (s->out_format != FMT_MJPEG) {
 821         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 822                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 823                           31, 1);
 824         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 825                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 826                           31, 0);
 827     }
 828
 829     if (ff_rate_control_init(s) < 0)
 830         return -1;
 831
 832 #if FF_API_ERROR_RATE
 833     FF_DISABLE_DEPRECATION_WARNINGS
 834     if (avctx->error_rate)
 835         s->error_rate = avctx->error_rate;
 836     FF_ENABLE_DEPRECATION_WARNINGS;
 837 #endif
 838
 839 #if FF_API_NORMALIZE_AQP
 840     FF_DISABLE_DEPRECATION_WARNINGS
 841     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 842         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 843     FF_ENABLE_DEPRECATION_WARNINGS;
 844 #endif
 845
 846 #if FF_API_MV0
 847     FF_DISABLE_DEPRECATION_WARNINGS
 848     if (avctx->flags & CODEC_FLAG_MV0)
 849         s->mpv_flags |= FF_MPV_FLAG_MV0;
 850     FF_ENABLE_DEPRECATION_WARNINGS
 851 #endif
 852
 853 #if FF_API_MPV_OPT
 854     FF_DISABLE_DEPRECATION_WARNINGS
 855     if (avctx->rc_qsquish != 0.0)
 856         s->rc_qsquish = avctx->rc_qsquish;
 857     if (avctx->rc_qmod_amp != 0.0)
 858         s->rc_qmod_amp = avctx->rc_qmod_amp;
 859     if (avctx->rc_qmod_freq)
 860         s->rc_qmod_freq = avctx->rc_qmod_freq;
 861     if (avctx->rc_buffer_aggressivity != 1.0)
 862         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
 863     if (avctx->rc_initial_cplx != 0.0)
 864         s->rc_initial_cplx = avctx->rc_initial_cplx;
 865     if (avctx->lmin)
 866         s->lmin = avctx->lmin;
 867     if (avctx->lmax)
 868         s->lmax = avctx->lmax;
 869
 870     if (avctx->rc_eq) {
 871         av_freep(&s->rc_eq);
 872         s->rc_eq = av_strdup(avctx->rc_eq);
 873         if (!s->rc_eq)
 874             return AVERROR(ENOMEM);
 875     }
 876     FF_ENABLE_DEPRECATION_WARNINGS
 877 #endif
 878
 879     if (s->b_frame_strategy == 2) {
 880         for (i = 0; i < s->max_b_frames + 2; i++) {
 881             s->tmp_frames[i] = av_frame_alloc();
 882             if (!s->tmp_frames[i])
 883                 return AVERROR(ENOMEM);
 884
 885             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 886             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 887             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 888
 889             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 890             if (ret < 0)
 891                 return ret;
 892         }
 893     }
 894
 895     cpb_props = ff_add_cpb_side_data(avctx);
 896     if (!cpb_props)
 897         return AVERROR(ENOMEM);
 898     cpb_props->max_bitrate = avctx->rc_max_rate;
 899     cpb_props->min_bitrate = avctx->rc_min_rate;
 900     cpb_props->avg_bitrate = avctx->bit_rate;
 901     cpb_props->buffer_size = avctx->rc_buffer_size;
 902
 903     return 0;
 904 fail:
 905     ff_mpv_encode_end(avctx);
 906     return AVERROR_UNKNOWN;
 907 }
 908
 909 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
 910 {
 911     MpegEncContext *s = avctx->priv_data;
 912     int i;
 913
 914     ff_rate_control_uninit(s);
 915
 916     ff_mpv_common_end(s);
 917     if (CONFIG_MJPEG_ENCODER &&
 918         s->out_format == FMT_MJPEG)
 919         ff_mjpeg_encode_close(s);
 920
 921     av_freep(&avctx->extradata);
 922
 923     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 924         av_frame_free(&s->tmp_frames[i]);
 925
 926     ff_free_picture_tables(&s->new_picture);
 927     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
 928
 929     av_freep(&s->avctx->stats_out);
 930     av_freep(&s->ac_stats);
 931
 932     av_freep(&s->q_intra_matrix);
 933     av_freep(&s->q_inter_matrix);
 934     av_freep(&s->q_intra_matrix16);
 935     av_freep(&s->q_inter_matrix16);
 936     av_freep(&s->input_picture);
 937     av_freep(&s->reordered_input_picture);
 938     av_freep(&s->dct_offset);
 939
 940     return 0;
 941 }
 942
 943 static int get_sae(uint8_t *src, int ref, int stride)
 944 {
 945     int x,y;
 946     int acc = 0;
 947
 948     for (y = 0; y < 16; y++) {
 949         for (x = 0; x < 16; x++) {
 950             acc += FFABS(src[x + y * stride] - ref);
 951         }
 952     }
 953
 954     return acc;
 955 }
 956
 957 static int get_intra_count(MpegEncContext *s, uint8_t *src,
 958                            uint8_t *ref, int stride)
 959 {
 960     int x, y, w, h;
 961     int acc = 0;
 962
 963     w = s->width  & ~15;
 964     h = s->height & ~15;
 965
 966     for (y = 0; y < h; y += 16) {
 967         for (x = 0; x < w; x += 16) {
 968             int offset = x + y * stride;
 969             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
 970                                       stride, 16);
 971             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
 972             int sae  = get_sae(src + offset, mean, stride);
 973
 974             acc += sae + 500 < sad;
 975         }
 976     }
 977     return acc;
 978 }
 979
 980 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
 981 {
 982     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
 983                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
 984                             s->mb_stride, s->mb_height, s->b8_stride,
 985                             &s->linesize, &s->uvlinesize);
 986 }
 987
 988 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
 989 {
 990     Picture *pic = NULL;
 991     int64_t pts;
 992     int i, display_picture_number = 0, ret;
 993     int encoding_delay = s->max_b_frames ? s->max_b_frames
 994                                          : (s->low_delay ? 0 : 1);
 995     int flush_offset = 1;
 996     int direct = 1;
 997
 998     if (pic_arg) {
 999         pts = pic_arg->pts;
1000         display_picture_number = s->input_picture_number++;
1001
1002         if (pts != AV_NOPTS_VALUE) {
1003             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1004                 int64_t time = pts;
1005                 int64_t last = s->user_specified_pts;
1006
1007                 if (time <= last) {
1008                     av_log(s->avctx, AV_LOG_ERROR,
1009                            "Error, Invalid timestamp=%"PRId64", "
1010                            "last=%"PRId64"\n", pts, s->user_specified_pts);
1011                     return -1;
1012                 }
1013
1014                 if (!s->low_delay && display_picture_number == 1)
1015                     s->dts_delta = time - last;
1016             }
1017             s->user_specified_pts = pts;
1018         } else {
1019             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1020                 s->user_specified_pts =
1021                 pts = s->user_specified_pts + 1;
1022                 av_log(s->avctx, AV_LOG_INFO,
1023                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1024                        pts);
1025             } else {
1026                 pts = display_picture_number;
1027             }
1028         }
1029
1030         if (!pic_arg->buf[0] ||
1031             pic_arg->linesize[0] != s->linesize ||
1032             pic_arg->linesize[1] != s->uvlinesize ||
1033             pic_arg->linesize[2] != s->uvlinesize)
1034             direct = 0;
1035         if ((s->width & 15) || (s->height & 15))
1036             direct = 0;
1037
1038         ff_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
1039                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1040
1041         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1042         if (i < 0)
1043             return i;
1044
1045         pic = &s->picture[i];
1046         pic->reference = 3;
1047
1048         if (direct) {
1049             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1050                 return ret;
1051         }
1052         ret = alloc_picture(s, pic, direct);
1053         if (ret < 0)
1054             return ret;
1055
1056         if (!direct) {
1057             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1058                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1059                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1060                 // empty
1061             } else {
1062                 int h_chroma_shift, v_chroma_shift;
1063                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1064                                                  &h_chroma_shift,
1065                                                  &v_chroma_shift);
1066
1067                 for (i = 0; i < 3; i++) {
1068                     int src_stride = pic_arg->linesize[i];
1069                     int dst_stride = i ? s->uvlinesize : s->linesize;
1070                     int h_shift = i ? h_chroma_shift : 0;
1071                     int v_shift = i ? v_chroma_shift : 0;
1072                     int w = s->width  >> h_shift;
1073                     int h = s->height >> v_shift;
1074                     uint8_t *src = pic_arg->data[i];
1075                     uint8_t *dst = pic->f->data[i];
1076
1077                     if (!s->avctx->rc_buffer_size)
1078                         dst += INPLACE_OFFSET;
1079
1080                     if (src_stride == dst_stride)
1081                         memcpy(dst, src, src_stride * h);
1082                     else {
1083                         int h2 = h;
1084                         uint8_t *dst2 = dst;
1085                         while (h2--) {
1086                             memcpy(dst2, src, w);
1087                             dst2 += dst_stride;
1088                             src += src_stride;
1089                         }
1090                     }
1091                     if ((s->width & 15) || (s->height & 15)) {
1092                         s->mpvencdsp.draw_edges(dst, dst_stride,
1093                                                 w, h,
1094                                                 16 >> h_shift,
1095                                                 16 >> v_shift,
1096                                                 EDGE_BOTTOM);
1097                     }
1098                 }
1099             }
1100         }
1101         ret = av_frame_copy_props(pic->f, pic_arg);
1102         if (ret < 0)
1103             return ret;
1104
1105         pic->f->display_picture_number = display_picture_number;
1106         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1107     } else {
1108         /* Flushing: When we have not received enough input frames,
1109          * ensure s->input_picture[0] contains the first picture */
1110         for (flush_offset = 0; flush_offset < encoding_delay + 1; flush_offset++)
1111             if (s->input_picture[flush_offset])
1112                 break;
1113
1114         if (flush_offset <= 1)
1115             flush_offset = 1;
1116         else
1117             encoding_delay = encoding_delay - flush_offset + 1;
1118     }
1119
1120     /* shift buffer entries */
1121     for (i = flush_offset; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1122         s->input_picture[i - flush_offset] = s->input_picture[i];
1123
1124     s->input_picture[encoding_delay] = (Picture*) pic;
1125
1126     return 0;
1127 }
1128
1129 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1130 {
1131     int x, y, plane;
1132     int score = 0;
1133     int64_t score64 = 0;
1134
1135     for (plane = 0; plane < 3; plane++) {
1136         const int stride = p->f->linesize[plane];
1137         const int bw = plane ? 1 : 2;
1138         for (y = 0; y < s->mb_height * bw; y++) {
1139             for (x = 0; x < s->mb_width * bw; x++) {
1140                 int off = p->shared ? 0 : 16;
1141                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1142                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1143                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1144
1145                 switch (s->avctx->frame_skip_exp) {
1146                 case 0: score    =  FFMAX(score, v);          break;
1147                 case 1: score   += FFABS(v);                  break;
1148                 case 2: score   += v * v;                     break;
1149                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1150                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1151                 }
1152             }
1153         }
1154     }
1155
1156     if (score)
1157         score64 = score;
1158
1159     if (score64 < s->avctx->frame_skip_threshold)
1160         return 1;
1161     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1162         return 1;
1163     return 0;
1164 }
1165
1166 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1167 {
1168     AVPacket pkt = { 0 };
1169     int ret, got_output;
1170
1171     av_init_packet(&pkt);
1172     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1173     if (ret < 0)
1174         return ret;
1175
1176     ret = pkt.size;
1177     av_packet_unref(&pkt);
1178     return ret;
1179 }
1180
1181 static int estimate_best_b_count(MpegEncContext *s)
1182 {
1183     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1184     AVCodecContext *c = avcodec_alloc_context3(NULL);
1185     const int scale = s->avctx->brd_scale;
1186     int i, j, out_size, p_lambda, b_lambda, lambda2;
1187     int64_t best_rd  = INT64_MAX;
1188     int best_b_count = -1;
1189
1190     if (!c)
1191         return AVERROR(ENOMEM);
1192     assert(scale >= 0 && scale <= 3);
1193
1194     //emms_c();
1195     //s->next_picture_ptr->quality;
1196     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1197     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1198     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1199     if (!b_lambda) // FIXME we should do this somewhere else
1200         b_lambda = p_lambda;
1201     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1202                FF_LAMBDA_SHIFT;
1203
1204     c->width        = s->width  >> scale;
1205     c->height       = s->height >> scale;
1206     c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1207     c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1208     c->mb_decision  = s->avctx->mb_decision;
1209     c->me_cmp       = s->avctx->me_cmp;
1210     c->mb_cmp       = s->avctx->mb_cmp;
1211     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1212     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1213     c->time_base    = s->avctx->time_base;
1214     c->max_b_frames = s->max_b_frames;
1215
1216     if (avcodec_open2(c, codec, NULL) < 0)
1217         return -1;
1218
1219     for (i = 0; i < s->max_b_frames + 2; i++) {
1220         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1221                                                 s->next_picture_ptr;
1222
1223         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1224             pre_input = *pre_input_ptr;
1225
1226             if (!pre_input.shared && i) {
1227                 pre_input.f->data[0] += INPLACE_OFFSET;
1228                 pre_input.f->data[1] += INPLACE_OFFSET;
1229                 pre_input.f->data[2] += INPLACE_OFFSET;
1230             }
1231
1232             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1233                                        s->tmp_frames[i]->linesize[0],
1234                                        pre_input.f->data[0],
1235                                        pre_input.f->linesize[0],
1236                                        c->width, c->height);
1237             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1238                                        s->tmp_frames[i]->linesize[1],
1239                                        pre_input.f->data[1],
1240                                        pre_input.f->linesize[1],
1241                                        c->width >> 1, c->height >> 1);
1242             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1243                                        s->tmp_frames[i]->linesize[2],
1244                                        pre_input.f->data[2],
1245                                        pre_input.f->linesize[2],
1246                                        c->width >> 1, c->height >> 1);
1247         }
1248     }
1249
1250     for (j = 0; j < s->max_b_frames + 1; j++) {
1251         int64_t rd = 0;
1252
1253         if (!s->input_picture[j])
1254             break;
1255
1256         c->error[0] = c->error[1] = c->error[2] = 0;
1257
1258         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1259         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1260
1261         out_size = encode_frame(c, s->tmp_frames[0]);
1262
1263         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1264
1265         for (i = 0; i < s->max_b_frames + 1; i++) {
1266             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1267
1268             s->tmp_frames[i + 1]->pict_type = is_p ?
1269                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1270             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1271
1272             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1273
1274             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1275         }
1276
1277         /* get the delayed frames */
1278         while (out_size) {
1279             out_size = encode_frame(c, NULL);
1280             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1281         }
1282
1283         rd += c->error[0] + c->error[1] + c->error[2];
1284
1285         if (rd < best_rd) {
1286             best_rd = rd;
1287             best_b_count = j;
1288         }
1289     }
1290
1291     avcodec_close(c);
1292     av_freep(&c);
1293
1294     return best_b_count;
1295 }
1296
1297 static int select_input_picture(MpegEncContext *s)
1298 {
1299     int i, ret;
1300
1301     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1302         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1303     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1304
1305     /* set next picture type & ordering */
1306     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1307         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1308             !s->next_picture_ptr || s->intra_only) {
1309             s->reordered_input_picture[0] = s->input_picture[0];
1310             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1311             s->reordered_input_picture[0]->f->coded_picture_number =
1312                 s->coded_picture_number++;
1313         } else {
1314             int b_frames = 0;
1315
1316             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1317                 if (s->picture_in_gop_number < s->gop_size &&
1318                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1319                     // FIXME check that te gop check above is +-1 correct
1320                     av_frame_unref(s->input_picture[0]->f);
1321
1322                     emms_c();
1323                     ff_vbv_update(s, 0);
1324
1325                     goto no_output_pic;
1326                 }
1327             }
1328
1329             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1330                 for (i = 0; i < s->max_b_frames + 1; i++) {
1331                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1332
1333                     if (pict_num >= s->rc_context.num_entries)
1334                         break;
1335                     if (!s->input_picture[i]) {
1336                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1337                         break;
1338                     }
1339
1340                     s->input_picture[i]->f->pict_type =
1341                         s->rc_context.entry[pict_num].new_pict_type;
1342                 }
1343             }
1344
1345             if (s->b_frame_strategy == 0) {
1346                 b_frames = s->max_b_frames;
1347                 while (b_frames && !s->input_picture[b_frames])
1348                     b_frames--;
1349             } else if (s->b_frame_strategy == 1) {
1350                 for (i = 1; i < s->max_b_frames + 1; i++) {
1351                     if (s->input_picture[i] &&
1352                         s->input_picture[i]->b_frame_score == 0) {
1353                         s->input_picture[i]->b_frame_score =
1354                             get_intra_count(s,
1355                                             s->input_picture[i    ]->f->data[0],
1356                                             s->input_picture[i - 1]->f->data[0],
1357                                             s->linesize) + 1;
1358                     }
1359                 }
1360                 for (i = 0; i < s->max_b_frames + 1; i++) {
1361                     if (!s->input_picture[i] ||
1362                         s->input_picture[i]->b_frame_score - 1 >
1363                             s->mb_num / s->b_sensitivity)
1364                         break;
1365                 }
1366
1367                 b_frames = FFMAX(0, i - 1);
1368
1369                 /* reset scores */
1370                 for (i = 0; i < b_frames + 1; i++) {
1371                     s->input_picture[i]->b_frame_score = 0;
1372                 }
1373             } else if (s->b_frame_strategy == 2) {
1374                 b_frames = estimate_best_b_count(s);
1375             }
1376
1377             emms_c();
1378
1379             for (i = b_frames - 1; i >= 0; i--) {
1380                 int type = s->input_picture[i]->f->pict_type;
1381                 if (type && type != AV_PICTURE_TYPE_B)
1382                     b_frames = i;
1383             }
1384             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1385                 b_frames == s->max_b_frames) {
1386                 av_log(s->avctx, AV_LOG_ERROR,
1387                        "warning, too many b frames in a row\n");
1388             }
1389
1390             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1391                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1392                     s->gop_size > s->picture_in_gop_number) {
1393                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1394                 } else {
1395                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1396                         b_frames = 0;
1397                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1398                 }
1399             }
1400
1401             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1402                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1403                 b_frames--;
1404
1405             s->reordered_input_picture[0] = s->input_picture[b_frames];
1406             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1407                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1408             s->reordered_input_picture[0]->f->coded_picture_number =
1409                 s->coded_picture_number++;
1410             for (i = 0; i < b_frames; i++) {
1411                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1412                 s->reordered_input_picture[i + 1]->f->pict_type =
1413                     AV_PICTURE_TYPE_B;
1414                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1415                     s->coded_picture_number++;
1416             }
1417         }
1418     }
1419 no_output_pic:
1420     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1421
1422     if (s->reordered_input_picture[0]) {
1423         s->reordered_input_picture[0]->reference =
1424            s->reordered_input_picture[0]->f->pict_type !=
1425                AV_PICTURE_TYPE_B ? 3 : 0;
1426
1427         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1428             return ret;
1429
1430         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1431             // input is a shared pix, so we can't modifiy it -> alloc a new
1432             // one & ensure that the shared one is reuseable
1433
1434             Picture *pic;
1435             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1436             if (i < 0)
1437                 return i;
1438             pic = &s->picture[i];
1439
1440             pic->reference = s->reordered_input_picture[0]->reference;
1441             if (alloc_picture(s, pic, 0) < 0) {
1442                 return -1;
1443             }
1444
1445             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1446             if (ret < 0)
1447                 return ret;
1448
1449             /* mark us unused / free shared pic */
1450             av_frame_unref(s->reordered_input_picture[0]->f);
1451             s->reordered_input_picture[0]->shared = 0;
1452
1453             s->current_picture_ptr = pic;
1454         } else {
1455             // input is not a shared pix -> reuse buffer for current_pix
1456             s->current_picture_ptr = s->reordered_input_picture[0];
1457             for (i = 0; i < 4; i++) {
1458                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1459             }
1460         }
1461         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1462         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1463                                        s->current_picture_ptr)) < 0)
1464             return ret;
1465
1466         s->picture_number = s->new_picture.f->display_picture_number;
1467     }
1468     return 0;
1469 }
1470
1471 static void frame_end(MpegEncContext *s)
1472 {
1473     int i;
1474
1475     if (s->unrestricted_mv &&
1476         s->current_picture.reference &&
1477         !s->intra_only) {
1478         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1479         int hshift = desc->log2_chroma_w;
1480         int vshift = desc->log2_chroma_h;
1481         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1482                                 s->h_edge_pos, s->v_edge_pos,
1483                                 EDGE_WIDTH, EDGE_WIDTH,
1484                                 EDGE_TOP | EDGE_BOTTOM);
1485         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1486                                 s->h_edge_pos >> hshift,
1487                                 s->v_edge_pos >> vshift,
1488                                 EDGE_WIDTH >> hshift,
1489                                 EDGE_WIDTH >> vshift,
1490                                 EDGE_TOP | EDGE_BOTTOM);
1491         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1492                                 s->h_edge_pos >> hshift,
1493                                 s->v_edge_pos >> vshift,
1494                                 EDGE_WIDTH >> hshift,
1495                                 EDGE_WIDTH >> vshift,
1496                                 EDGE_TOP | EDGE_BOTTOM);
1497     }
1498
1499     emms_c();
1500
1501     s->last_pict_type                 = s->pict_type;
1502     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1503     if (s->pict_type!= AV_PICTURE_TYPE_B)
1504         s->last_non_b_pict_type = s->pict_type;
1505
1506     if (s->encoding) {
1507         /* release non-reference frames */
1508         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1509             if (!s->picture[i].reference)
1510                 ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1511         }
1512     }
1513
1514 #if FF_API_CODED_FRAME
1515 FF_DISABLE_DEPRECATION_WARNINGS
1516     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1517 FF_ENABLE_DEPRECATION_WARNINGS
1518 #endif
1519 #if FF_API_ERROR_FRAME
1520 FF_DISABLE_DEPRECATION_WARNINGS
1521     memcpy(s->current_picture.f->error, s->current_picture.encoding_error,
1522            sizeof(s->current_picture.encoding_error));
1523 FF_ENABLE_DEPRECATION_WARNINGS
1524 #endif
1525 }
1526
1527 static void update_noise_reduction(MpegEncContext *s)
1528 {
1529     int intra, i;
1530
1531     for (intra = 0; intra < 2; intra++) {
1532         if (s->dct_count[intra] > (1 << 16)) {
1533             for (i = 0; i < 64; i++) {
1534                 s->dct_error_sum[intra][i] >>= 1;
1535             }
1536             s->dct_count[intra] >>= 1;
1537         }
1538
1539         for (i = 0; i < 64; i++) {
1540             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1541                                        s->dct_count[intra] +
1542                                        s->dct_error_sum[intra][i] / 2) /
1543                                       (s->dct_error_sum[intra][i] + 1);
1544         }
1545     }
1546 }
1547
1548 static int frame_start(MpegEncContext *s)
1549 {
1550     int ret;
1551
1552     /* mark & release old frames */
1553     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1554         s->last_picture_ptr != s->next_picture_ptr &&
1555         s->last_picture_ptr->f->buf[0]) {
1556         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1557     }
1558
1559     s->current_picture_ptr->f->pict_type = s->pict_type;
1560     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1561
1562     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1563     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1564                                    s->current_picture_ptr)) < 0)
1565         return ret;
1566
1567     if (s->pict_type != AV_PICTURE_TYPE_B) {
1568         s->last_picture_ptr = s->next_picture_ptr;
1569         if (!s->droppable)
1570             s->next_picture_ptr = s->current_picture_ptr;
1571     }
1572
1573     if (s->last_picture_ptr) {
1574         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1575         if (s->last_picture_ptr->f->buf[0] &&
1576             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1577                                        s->last_picture_ptr)) < 0)
1578             return ret;
1579     }
1580     if (s->next_picture_ptr) {
1581         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1582         if (s->next_picture_ptr->f->buf[0] &&
1583             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1584                                        s->next_picture_ptr)) < 0)
1585             return ret;
1586     }
1587
1588     if (s->picture_structure!= PICT_FRAME) {
1589         int i;
1590         for (i = 0; i < 4; i++) {
1591             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1592                 s->current_picture.f->data[i] +=
1593                     s->current_picture.f->linesize[i];
1594             }
1595             s->current_picture.f->linesize[i] *= 2;
1596             s->last_picture.f->linesize[i]    *= 2;
1597             s->next_picture.f->linesize[i]    *= 2;
1598         }
1599     }
1600
1601     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1602         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1603         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1604     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1605         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1606         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1607     } else {
1608         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1609         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1610     }
1611
1612     if (s->dct_error_sum) {
1613         assert(s->avctx->noise_reduction && s->encoding);
1614         update_noise_reduction(s);
1615     }
1616
1617     return 0;
1618 }
1619
1620 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1621                           const AVFrame *pic_arg, int *got_packet)
1622 {
1623     MpegEncContext *s = avctx->priv_data;
1624     int i, stuffing_count, ret;
1625     int context_count = s->slice_context_count;
1626
1627     s->picture_in_gop_number++;
1628
1629     if (load_input_picture(s, pic_arg) < 0)
1630         return -1;
1631
1632     if (select_input_picture(s) < 0) {
1633         return -1;
1634     }
1635
1636     /* output? */
1637     if (s->new_picture.f->data[0]) {
1638         uint8_t *sd;
1639         if (!pkt->data &&
1640             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1641             return ret;
1642         if (s->mb_info) {
1643             s->mb_info_ptr = av_packet_new_side_data(pkt,
1644                                  AV_PKT_DATA_H263_MB_INFO,
1645                                  s->mb_width*s->mb_height*12);
1646             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1647         }
1648
1649         for (i = 0; i < context_count; i++) {
1650             int start_y = s->thread_context[i]->start_mb_y;
1651             int   end_y = s->thread_context[i]->  end_mb_y;
1652             int h       = s->mb_height;
1653             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1654             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1655
1656             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1657         }
1658
1659         s->pict_type = s->new_picture.f->pict_type;
1660         //emms_c();
1661         ret = frame_start(s);
1662         if (ret < 0)
1663             return ret;
1664 vbv_retry:
1665         if (encode_picture(s, s->picture_number) < 0)
1666             return -1;
1667
1668 #if FF_API_STAT_BITS
1669 FF_DISABLE_DEPRECATION_WARNINGS
1670         avctx->header_bits = s->header_bits;
1671         avctx->mv_bits     = s->mv_bits;
1672         avctx->misc_bits   = s->misc_bits;
1673         avctx->i_tex_bits  = s->i_tex_bits;
1674         avctx->p_tex_bits  = s->p_tex_bits;
1675         avctx->i_count     = s->i_count;
1676         // FIXME f/b_count in avctx
1677         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1678         avctx->skip_count  = s->skip_count;
1679 FF_ENABLE_DEPRECATION_WARNINGS
1680 #endif
1681
1682         frame_end(s);
1683
1684         sd = av_packet_new_side_data(pkt, AV_PKT_DATA_QUALITY_FACTOR,
1685                                      sizeof(int));
1686         if (!sd)
1687             return AVERROR(ENOMEM);
1688         *(int *)sd = s->current_picture.f->quality;
1689
1690         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1691             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1692
1693         if (avctx->rc_buffer_size) {
1694             RateControlContext *rcc = &s->rc_context;
1695             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1696
1697             if (put_bits_count(&s->pb) > max_size &&
1698                 s->lambda < s->lmax) {
1699                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1700                                        (s->qscale + 1) / s->qscale);
1701                 if (s->adaptive_quant) {
1702                     int i;
1703                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1704                         s->lambda_table[i] =
1705                             FFMAX(s->lambda_table[i] + 1,
1706                                   s->lambda_table[i] * (s->qscale + 1) /
1707                                   s->qscale);
1708                 }
1709                 s->mb_skipped = 0;        // done in frame_start()
1710                 // done in encode_picture() so we must undo it
1711                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1712                     if (s->flipflop_rounding          ||
1713                         s->codec_id == AV_CODEC_ID_H263P ||
1714                         s->codec_id == AV_CODEC_ID_MPEG4)
1715                         s->no_rounding ^= 1;
1716                 }
1717                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1718                     s->time_base       = s->last_time_base;
1719                     s->last_non_b_time = s->time - s->pp_time;
1720                 }
1721                 for (i = 0; i < context_count; i++) {
1722                     PutBitContext *pb = &s->thread_context[i]->pb;
1723                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1724                 }
1725                 goto vbv_retry;
1726             }
1727
1728             assert(s->avctx->rc_max_rate);
1729         }
1730
1731         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1732             ff_write_pass1_stats(s);
1733
1734         for (i = 0; i < 4; i++) {
1735             s->current_picture_ptr->encoding_error[i] = s->current_picture.encoding_error[i];
1736             avctx->error[i] += s->current_picture_ptr->encoding_error[i];
1737         }
1738
1739         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1740             assert(put_bits_count(&s->pb) == s->header_bits + s->mv_bits +
1741                                              s->misc_bits + s->i_tex_bits +
1742                                              s->p_tex_bits);
1743         flush_put_bits(&s->pb);
1744         s->frame_bits  = put_bits_count(&s->pb);
1745
1746         stuffing_count = ff_vbv_update(s, s->frame_bits);
1747         if (stuffing_count) {
1748             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1749                     stuffing_count + 50) {
1750                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1751                 return -1;
1752             }
1753
1754             switch (s->codec_id) {
1755             case AV_CODEC_ID_MPEG1VIDEO:
1756             case AV_CODEC_ID_MPEG2VIDEO:
1757                 while (stuffing_count--) {
1758                     put_bits(&s->pb, 8, 0);
1759                 }
1760             break;
1761             case AV_CODEC_ID_MPEG4:
1762                 put_bits(&s->pb, 16, 0);
1763                 put_bits(&s->pb, 16, 0x1C3);
1764                 stuffing_count -= 4;
1765                 while (stuffing_count--) {
1766                     put_bits(&s->pb, 8, 0xFF);
1767                 }
1768             break;
1769             default:
1770                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1771             }
1772             flush_put_bits(&s->pb);
1773             s->frame_bits  = put_bits_count(&s->pb);
1774         }
1775
1776         /* update mpeg1/2 vbv_delay for CBR */
1777         if (s->avctx->rc_max_rate                          &&
1778             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1779             s->out_format == FMT_MPEG1                     &&
1780             90000LL * (avctx->rc_buffer_size - 1) <=
1781                 s->avctx->rc_max_rate * 0xFFFFLL) {
1782             AVCPBProperties *props;
1783             size_t props_size;
1784
1785             int vbv_delay, min_delay;
1786             double inbits  = s->avctx->rc_max_rate *
1787                              av_q2d(s->avctx->time_base);
1788             int    minbits = s->frame_bits - 8 *
1789                              (s->vbv_delay_ptr - s->pb.buf - 1);
1790             double bits    = s->rc_context.buffer_index + minbits - inbits;
1791
1792             if (bits < 0)
1793                 av_log(s->avctx, AV_LOG_ERROR,
1794                        "Internal error, negative bits\n");
1795
1796             assert(s->repeat_first_field == 0);
1797
1798             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1799             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1800                         s->avctx->rc_max_rate;
1801
1802             vbv_delay = FFMAX(vbv_delay, min_delay);
1803
1804             assert(vbv_delay < 0xFFFF);
1805
1806             s->vbv_delay_ptr[0] &= 0xF8;
1807             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1808             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1809             s->vbv_delay_ptr[2] &= 0x07;
1810             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1811
1812             props = av_cpb_properties_alloc(&props_size);
1813             if (!props)
1814                 return AVERROR(ENOMEM);
1815             props->vbv_delay = vbv_delay * 300;
1816
1817             ret = av_packet_add_side_data(pkt, AV_PKT_DATA_CPB_PROPERTIES,
1818                                           (uint8_t*)props, props_size);
1819             if (ret < 0) {
1820                 av_freep(&props);
1821                 return ret;
1822             }
1823
1824 #if FF_API_VBV_DELAY
1825 FF_DISABLE_DEPRECATION_WARNINGS
1826             avctx->vbv_delay     = vbv_delay * 300;
1827 FF_ENABLE_DEPRECATION_WARNINGS
1828 #endif
1829         }
1830         s->total_bits     += s->frame_bits;
1831 #if FF_API_STAT_BITS
1832 FF_DISABLE_DEPRECATION_WARNINGS
1833         avctx->frame_bits  = s->frame_bits;
1834 FF_ENABLE_DEPRECATION_WARNINGS
1835 #endif
1836
1837
1838         pkt->pts = s->current_picture.f->pts;
1839         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1840             if (!s->current_picture.f->coded_picture_number)
1841                 pkt->dts = pkt->pts - s->dts_delta;
1842             else
1843                 pkt->dts = s->reordered_pts;
1844             s->reordered_pts = pkt->pts;
1845         } else
1846             pkt->dts = pkt->pts;
1847         if (s->current_picture.f->key_frame)
1848             pkt->flags |= AV_PKT_FLAG_KEY;
1849         if (s->mb_info)
1850             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1851     } else {
1852         s->frame_bits = 0;
1853     }
1854     assert((s->frame_bits & 7) == 0);
1855
1856     pkt->size = s->frame_bits / 8;
1857     *got_packet = !!pkt->size;
1858     return 0;
1859 }
1860
1861 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1862                                                 int n, int threshold)
1863 {
1864     static const char tab[64] = {
1865         3, 2, 2, 1, 1, 1, 1, 1,
1866         1, 1, 1, 1, 1, 1, 1, 1,
1867         1, 1, 1, 1, 1, 1, 1, 1,
1868         0, 0, 0, 0, 0, 0, 0, 0,
1869         0, 0, 0, 0, 0, 0, 0, 0,
1870         0, 0, 0, 0, 0, 0, 0, 0,
1871         0, 0, 0, 0, 0, 0, 0, 0,
1872         0, 0, 0, 0, 0, 0, 0, 0
1873     };
1874     int score = 0;
1875     int run = 0;
1876     int i;
1877     int16_t *block = s->block[n];
1878     const int last_index = s->block_last_index[n];
1879     int skip_dc;
1880
1881     if (threshold < 0) {
1882         skip_dc = 0;
1883         threshold = -threshold;
1884     } else
1885         skip_dc = 1;
1886
1887     /* Are all we could set to zero already zero? */
1888     if (last_index <= skip_dc - 1)
1889         return;
1890
1891     for (i = 0; i <= last_index; i++) {
1892         const int j = s->intra_scantable.permutated[i];
1893         const int level = FFABS(block[j]);
1894         if (level == 1) {
1895             if (skip_dc && i == 0)
1896                 continue;
1897             score += tab[run];
1898             run = 0;
1899         } else if (level > 1) {
1900             return;
1901         } else {
1902             run++;
1903         }
1904     }
1905     if (score >= threshold)
1906         return;
1907     for (i = skip_dc; i <= last_index; i++) {
1908         const int j = s->intra_scantable.permutated[i];
1909         block[j] = 0;
1910     }
1911     if (block[0])
1912         s->block_last_index[n] = 0;
1913     else
1914         s->block_last_index[n] = -1;
1915 }
1916
1917 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1918                                int last_index)
1919 {
1920     int i;
1921     const int maxlevel = s->max_qcoeff;
1922     const int minlevel = s->min_qcoeff;
1923     int overflow = 0;
1924
1925     if (s->mb_intra) {
1926         i = 1; // skip clipping of intra dc
1927     } else
1928         i = 0;
1929
1930     for (; i <= last_index; i++) {
1931         const int j = s->intra_scantable.permutated[i];
1932         int level = block[j];
1933
1934         if (level > maxlevel) {
1935             level = maxlevel;
1936             overflow++;
1937         } else if (level < minlevel) {
1938             level = minlevel;
1939             overflow++;
1940         }
1941
1942         block[j] = level;
1943     }
1944
1945     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1946         av_log(s->avctx, AV_LOG_INFO,
1947                "warning, clipping %d dct coefficients to %d..%d\n",
1948                overflow, minlevel, maxlevel);
1949 }
1950
1951 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1952 {
1953     int x, y;
1954     // FIXME optimize
1955     for (y = 0; y < 8; y++) {
1956         for (x = 0; x < 8; x++) {
1957             int x2, y2;
1958             int sum = 0;
1959             int sqr = 0;
1960             int count = 0;
1961
1962             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1963                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1964                     int v = ptr[x2 + y2 * stride];
1965                     sum += v;
1966                     sqr += v * v;
1967                     count++;
1968                 }
1969             }
1970             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1971         }
1972     }
1973 }
1974
1975 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1976                                                 int motion_x, int motion_y,
1977                                                 int mb_block_height,
1978                                                 int mb_block_count)
1979 {
1980     int16_t weight[8][64];
1981     int16_t orig[8][64];
1982     const int mb_x = s->mb_x;
1983     const int mb_y = s->mb_y;
1984     int i;
1985     int skip_dct[8];
1986     int dct_offset = s->linesize * 8; // default for progressive frames
1987     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1988     ptrdiff_t wrap_y, wrap_c;
1989
1990     for (i = 0; i < mb_block_count; i++)
1991         skip_dct[i] = s->skipdct;
1992
1993     if (s->adaptive_quant) {
1994         const int last_qp = s->qscale;
1995         const int mb_xy = mb_x + mb_y * s->mb_stride;
1996
1997         s->lambda = s->lambda_table[mb_xy];
1998         update_qscale(s);
1999
2000         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2001             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2002             s->dquant = s->qscale - last_qp;
2003
2004             if (s->out_format == FMT_H263) {
2005                 s->dquant = av_clip(s->dquant, -2, 2);
2006
2007                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2008                     if (!s->mb_intra) {
2009                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2010                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2011                                 s->dquant = 0;
2012                         }
2013                         if (s->mv_type == MV_TYPE_8X8)
2014                             s->dquant = 0;
2015                     }
2016                 }
2017             }
2018         }
2019         ff_set_qscale(s, last_qp + s->dquant);
2020     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2021         ff_set_qscale(s, s->qscale + s->dquant);
2022
2023     wrap_y = s->linesize;
2024     wrap_c = s->uvlinesize;
2025     ptr_y  = s->new_picture.f->data[0] +
2026              (mb_y * 16 * wrap_y)              + mb_x * 16;
2027     ptr_cb = s->new_picture.f->data[1] +
2028              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
2029     ptr_cr = s->new_picture.f->data[2] +
2030              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
2031
2032     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
2033         uint8_t *ebuf = s->sc.edge_emu_buffer + 32;
2034         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2035                                  wrap_y, wrap_y,
2036                                  16, 16, mb_x * 16, mb_y * 16,
2037                                  s->width, s->height);
2038         ptr_y = ebuf;
2039         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
2040                                  wrap_c, wrap_c,
2041                                  8, mb_block_height, mb_x * 8, mb_y * 8,
2042                                  s->width >> 1, s->height >> 1);
2043         ptr_cb = ebuf + 18 * wrap_y;
2044         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
2045                                  wrap_c, wrap_c,
2046                                  8, mb_block_height, mb_x * 8, mb_y * 8,
2047                                  s->width >> 1, s->height >> 1);
2048         ptr_cr = ebuf + 18 * wrap_y + 8;
2049     }
2050
2051     if (s->mb_intra) {
2052         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2053             int progressive_score, interlaced_score;
2054
2055             s->interlaced_dct = 0;
2056             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2057                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2058                                                      NULL, wrap_y, 8) - 400;
2059
2060             if (progressive_score > 0) {
2061                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2062                                                         NULL, wrap_y * 2, 8) +
2063                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2064                                                         NULL, wrap_y * 2, 8);
2065                 if (progressive_score > interlaced_score) {
2066                     s->interlaced_dct = 1;
2067
2068                     dct_offset = wrap_y;
2069                     wrap_y <<= 1;
2070                     if (s->chroma_format == CHROMA_422)
2071                         wrap_c <<= 1;
2072                 }
2073             }
2074         }
2075
2076         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2077         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2078         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2079         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2080
2081         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2082             skip_dct[4] = 1;
2083             skip_dct[5] = 1;
2084         } else {
2085             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2086             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2087             if (!s->chroma_y_shift) { /* 422 */
2088                 s->pdsp.get_pixels(s->block[6],
2089                                    ptr_cb + (dct_offset >> 1), wrap_c);
2090                 s->pdsp.get_pixels(s->block[7],
2091                                    ptr_cr + (dct_offset >> 1), wrap_c);
2092             }
2093         }
2094     } else {
2095         op_pixels_func (*op_pix)[4];
2096         qpel_mc_func (*op_qpix)[16];
2097         uint8_t *dest_y, *dest_cb, *dest_cr;
2098
2099         dest_y  = s->dest[0];
2100         dest_cb = s->dest[1];
2101         dest_cr = s->dest[2];
2102
2103         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2104             op_pix  = s->hdsp.put_pixels_tab;
2105             op_qpix = s->qdsp.put_qpel_pixels_tab;
2106         } else {
2107             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2108             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2109         }
2110
2111         if (s->mv_dir & MV_DIR_FORWARD) {
2112             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2113                           s->last_picture.f->data,
2114                           op_pix, op_qpix);
2115             op_pix  = s->hdsp.avg_pixels_tab;
2116             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2117         }
2118         if (s->mv_dir & MV_DIR_BACKWARD) {
2119             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2120                           s->next_picture.f->data,
2121                           op_pix, op_qpix);
2122         }
2123
2124         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2125             int progressive_score, interlaced_score;
2126
2127             s->interlaced_dct = 0;
2128             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2129                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2130                                                      ptr_y + wrap_y * 8,
2131                                                      wrap_y, 8) - 400;
2132
2133             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2134                 progressive_score -= 400;
2135
2136             if (progressive_score > 0) {
2137                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2138                                                         wrap_y * 2, 8) +
2139                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2140                                                         ptr_y + wrap_y,
2141                                                         wrap_y * 2, 8);
2142
2143                 if (progressive_score > interlaced_score) {
2144                     s->interlaced_dct = 1;
2145
2146                     dct_offset = wrap_y;
2147                     wrap_y <<= 1;
2148                     if (s->chroma_format == CHROMA_422)
2149                         wrap_c <<= 1;
2150                 }
2151             }
2152         }
2153
2154         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2155         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2156         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2157                             dest_y + dct_offset, wrap_y);
2158         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2159                             dest_y + dct_offset + 8, wrap_y);
2160
2161         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2162             skip_dct[4] = 1;
2163             skip_dct[5] = 1;
2164         } else {
2165             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2166             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2167             if (!s->chroma_y_shift) { /* 422 */
2168                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2169                                     dest_cb + (dct_offset >> 1), wrap_c);
2170                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2171                                     dest_cr + (dct_offset >> 1), wrap_c);
2172             }
2173         }
2174         /* pre quantization */
2175         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2176                 2 * s->qscale * s->qscale) {
2177             // FIXME optimize
2178             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2179                 skip_dct[0] = 1;
2180             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2181                 skip_dct[1] = 1;
2182             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2183                                wrap_y, 8) < 20 * s->qscale)
2184                 skip_dct[2] = 1;
2185             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2186                                wrap_y, 8) < 20 * s->qscale)
2187                 skip_dct[3] = 1;
2188             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2189                 skip_dct[4] = 1;
2190             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2191                 skip_dct[5] = 1;
2192             if (!s->chroma_y_shift) { /* 422 */
2193                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2194                                    dest_cb + (dct_offset >> 1),
2195                                    wrap_c, 8) < 20 * s->qscale)
2196                     skip_dct[6] = 1;
2197                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2198                                    dest_cr + (dct_offset >> 1),
2199                                    wrap_c, 8) < 20 * s->qscale)
2200                     skip_dct[7] = 1;
2201             }
2202         }
2203     }
2204
2205     if (s->quantizer_noise_shaping) {
2206         if (!skip_dct[0])
2207             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2208         if (!skip_dct[1])
2209             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2210         if (!skip_dct[2])
2211             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2212         if (!skip_dct[3])
2213             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2214         if (!skip_dct[4])
2215             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2216         if (!skip_dct[5])
2217             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2218         if (!s->chroma_y_shift) { /* 422 */
2219             if (!skip_dct[6])
2220                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2221                                   wrap_c);
2222             if (!skip_dct[7])
2223                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2224                                   wrap_c);
2225         }
2226         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2227     }
2228
2229     /* DCT & quantize */
2230     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2231     {
2232         for (i = 0; i < mb_block_count; i++) {
2233             if (!skip_dct[i]) {
2234                 int overflow;
2235                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2236                 // FIXME we could decide to change to quantizer instead of
2237                 // clipping
2238                 // JS: I don't think that would be a good idea it could lower
2239                 //     quality instead of improve it. Just INTRADC clipping
2240                 //     deserves changes in quantizer
2241                 if (overflow)
2242                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2243             } else
2244                 s->block_last_index[i] = -1;
2245         }
2246         if (s->quantizer_noise_shaping) {
2247             for (i = 0; i < mb_block_count; i++) {
2248                 if (!skip_dct[i]) {
2249                     s->block_last_index[i] =
2250                         dct_quantize_refine(s, s->block[i], weight[i],
2251                                             orig[i], i, s->qscale);
2252                 }
2253             }
2254         }
2255
2256         if (s->luma_elim_threshold && !s->mb_intra)
2257             for (i = 0; i < 4; i++)
2258                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2259         if (s->chroma_elim_threshold && !s->mb_intra)
2260             for (i = 4; i < mb_block_count; i++)
2261                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2262
2263         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2264             for (i = 0; i < mb_block_count; i++) {
2265                 if (s->block_last_index[i] == -1)
2266                     s->coded_score[i] = INT_MAX / 256;
2267             }
2268         }
2269     }
2270
2271     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2272         s->block_last_index[4] =
2273         s->block_last_index[5] = 0;
2274         s->block[4][0] =
2275         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2276     }
2277
2278     // non c quantize code returns incorrect block_last_index FIXME
2279     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2280         for (i = 0; i < mb_block_count; i++) {
2281             int j;
2282             if (s->block_last_index[i] > 0) {
2283                 for (j = 63; j > 0; j--) {
2284                     if (s->block[i][s->intra_scantable.permutated[j]])
2285                         break;
2286                 }
2287                 s->block_last_index[i] = j;
2288             }
2289         }
2290     }
2291
2292     /* huffman encode */
2293     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2294     case AV_CODEC_ID_MPEG1VIDEO:
2295     case AV_CODEC_ID_MPEG2VIDEO:
2296         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2297             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2298         break;
2299     case AV_CODEC_ID_MPEG4:
2300         if (CONFIG_MPEG4_ENCODER)
2301             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2302         break;
2303     case AV_CODEC_ID_MSMPEG4V2:
2304     case AV_CODEC_ID_MSMPEG4V3:
2305     case AV_CODEC_ID_WMV1:
2306         if (CONFIG_MSMPEG4_ENCODER)
2307             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2308         break;
2309     case AV_CODEC_ID_WMV2:
2310         if (CONFIG_WMV2_ENCODER)
2311             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2312         break;
2313     case AV_CODEC_ID_H261:
2314         if (CONFIG_H261_ENCODER)
2315             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2316         break;
2317     case AV_CODEC_ID_H263:
2318     case AV_CODEC_ID_H263P:
2319     case AV_CODEC_ID_FLV1:
2320     case AV_CODEC_ID_RV10:
2321     case AV_CODEC_ID_RV20:
2322         if (CONFIG_H263_ENCODER)
2323             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2324         break;
2325     case AV_CODEC_ID_MJPEG:
2326         if (CONFIG_MJPEG_ENCODER)
2327             ff_mjpeg_encode_mb(s, s->block);
2328         break;
2329     default:
2330         assert(0);
2331     }
2332 }
2333
2334 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2335 {
2336     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2337     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2338 }
2339
2340 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2341     int i;
2342
2343     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2344
2345     /* mpeg1 */
2346     d->mb_skip_run= s->mb_skip_run;
2347     for(i=0; i<3; i++)
2348         d->last_dc[i] = s->last_dc[i];
2349
2350     /* statistics */
2351     d->mv_bits= s->mv_bits;
2352     d->i_tex_bits= s->i_tex_bits;
2353     d->p_tex_bits= s->p_tex_bits;
2354     d->i_count= s->i_count;
2355     d->f_count= s->f_count;
2356     d->b_count= s->b_count;
2357     d->skip_count= s->skip_count;
2358     d->misc_bits= s->misc_bits;
2359     d->last_bits= 0;
2360
2361     d->mb_skipped= 0;
2362     d->qscale= s->qscale;
2363     d->dquant= s->dquant;
2364
2365     d->esc3_level_length= s->esc3_level_length;
2366 }
2367
2368 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2369     int i;
2370
2371     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2372     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2373
2374     /* mpeg1 */
2375     d->mb_skip_run= s->mb_skip_run;
2376     for(i=0; i<3; i++)
2377         d->last_dc[i] = s->last_dc[i];
2378
2379     /* statistics */
2380     d->mv_bits= s->mv_bits;
2381     d->i_tex_bits= s->i_tex_bits;
2382     d->p_tex_bits= s->p_tex_bits;
2383     d->i_count= s->i_count;
2384     d->f_count= s->f_count;
2385     d->b_count= s->b_count;
2386     d->skip_count= s->skip_count;
2387     d->misc_bits= s->misc_bits;
2388
2389     d->mb_intra= s->mb_intra;
2390     d->mb_skipped= s->mb_skipped;
2391     d->mv_type= s->mv_type;
2392     d->mv_dir= s->mv_dir;
2393     d->pb= s->pb;
2394     if(s->data_partitioning){
2395         d->pb2= s->pb2;
2396         d->tex_pb= s->tex_pb;
2397     }
2398     d->block= s->block;
2399     for(i=0; i<8; i++)
2400         d->block_last_index[i]= s->block_last_index[i];
2401     d->interlaced_dct= s->interlaced_dct;
2402     d->qscale= s->qscale;
2403
2404     d->esc3_level_length= s->esc3_level_length;
2405 }
2406
2407 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2408                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2409                            int *dmin, int *next_block, int motion_x, int motion_y)
2410 {
2411     int score;
2412     uint8_t *dest_backup[3];
2413
2414     copy_context_before_encode(s, backup, type);
2415
2416     s->block= s->blocks[*next_block];
2417     s->pb= pb[*next_block];
2418     if(s->data_partitioning){
2419         s->pb2   = pb2   [*next_block];
2420         s->tex_pb= tex_pb[*next_block];
2421     }
2422
2423     if(*next_block){
2424         memcpy(dest_backup, s->dest, sizeof(s->dest));
2425         s->dest[0] = s->sc.rd_scratchpad;
2426         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2427         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2428         assert(s->linesize >= 32); //FIXME
2429     }
2430
2431     encode_mb(s, motion_x, motion_y);
2432
2433     score= put_bits_count(&s->pb);
2434     if(s->data_partitioning){
2435         score+= put_bits_count(&s->pb2);
2436         score+= put_bits_count(&s->tex_pb);
2437     }
2438
2439     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2440         ff_mpv_decode_mb(s, s->block);
2441
2442         score *= s->lambda2;
2443         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2444     }
2445
2446     if(*next_block){
2447         memcpy(s->dest, dest_backup, sizeof(s->dest));
2448     }
2449
2450     if(score<*dmin){
2451         *dmin= score;
2452         *next_block^=1;
2453
2454         copy_context_after_encode(best, s, type);
2455     }
2456 }
2457
2458 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2459     uint32_t *sq = ff_square_tab + 256;
2460     int acc=0;
2461     int x,y;
2462
2463     if(w==16 && h==16)
2464         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2465     else if(w==8 && h==8)
2466         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2467
2468     for(y=0; y<h; y++){
2469         for(x=0; x<w; x++){
2470             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2471         }
2472     }
2473
2474     assert(acc>=0);
2475
2476     return acc;
2477 }
2478
2479 static int sse_mb(MpegEncContext *s){
2480     int w= 16;
2481     int h= 16;
2482
2483     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2484     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2485
2486     if(w==16 && h==16)
2487       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2488         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2489                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2490                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2491       }else{
2492         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2493                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2494                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2495       }
2496     else
2497         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2498                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2499                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2500 }
2501
2502 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2503     MpegEncContext *s= *(void**)arg;
2504
2505
2506     s->me.pre_pass=1;
2507     s->me.dia_size= s->avctx->pre_dia_size;
2508     s->first_slice_line=1;
2509     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2510         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2511             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2512         }
2513         s->first_slice_line=0;
2514     }
2515
2516     s->me.pre_pass=0;
2517
2518     return 0;
2519 }
2520
2521 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2522     MpegEncContext *s= *(void**)arg;
2523
2524     s->me.dia_size= s->avctx->dia_size;
2525     s->first_slice_line=1;
2526     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2527         s->mb_x=0; //for block init below
2528         ff_init_block_index(s);
2529         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2530             s->block_index[0]+=2;
2531             s->block_index[1]+=2;
2532             s->block_index[2]+=2;
2533             s->block_index[3]+=2;
2534
2535             /* compute motion vector & mb_type and store in context */
2536             if(s->pict_type==AV_PICTURE_TYPE_B)
2537                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2538             else
2539                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2540         }
2541         s->first_slice_line=0;
2542     }
2543     return 0;
2544 }
2545
2546 static int mb_var_thread(AVCodecContext *c, void *arg){
2547     MpegEncContext *s= *(void**)arg;
2548     int mb_x, mb_y;
2549
2550     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2551         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2552             int xx = mb_x * 16;
2553             int yy = mb_y * 16;
2554             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2555             int varc;
2556             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2557
2558             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2559                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2560
2561             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2562             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2563             s->me.mb_var_sum_temp    += varc;
2564         }
2565     }
2566     return 0;
2567 }
2568
2569 static void write_slice_end(MpegEncContext *s){
2570     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2571         if(s->partitioned_frame){
2572             ff_mpeg4_merge_partitions(s);
2573         }
2574
2575         ff_mpeg4_stuffing(&s->pb);
2576     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2577         ff_mjpeg_encode_stuffing(&s->pb);
2578     }
2579
2580     avpriv_align_put_bits(&s->pb);
2581     flush_put_bits(&s->pb);
2582
2583     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2584         s->misc_bits+= get_bits_diff(s);
2585 }
2586
2587 static void write_mb_info(MpegEncContext *s)
2588 {
2589     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2590     int offset = put_bits_count(&s->pb);
2591     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2592     int gobn = s->mb_y / s->gob_index;
2593     int pred_x, pred_y;
2594     if (CONFIG_H263_ENCODER)
2595         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2596     bytestream_put_le32(&ptr, offset);
2597     bytestream_put_byte(&ptr, s->qscale);
2598     bytestream_put_byte(&ptr, gobn);
2599     bytestream_put_le16(&ptr, mba);
2600     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2601     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2602     /* 4MV not implemented */
2603     bytestream_put_byte(&ptr, 0); /* hmv2 */
2604     bytestream_put_byte(&ptr, 0); /* vmv2 */
2605 }
2606
2607 static void update_mb_info(MpegEncContext *s, int startcode)
2608 {
2609     if (!s->mb_info)
2610         return;
2611     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2612         s->mb_info_size += 12;
2613         s->prev_mb_info = s->last_mb_info;
2614     }
2615     if (startcode) {
2616         s->prev_mb_info = put_bits_count(&s->pb)/8;
2617         /* This might have incremented mb_info_size above, and we return without
2618          * actually writing any info into that slot yet. But in that case,
2619          * this will be called again at the start of the after writing the
2620          * start code, actually writing the mb info. */
2621         return;
2622     }
2623
2624     s->last_mb_info = put_bits_count(&s->pb)/8;
2625     if (!s->mb_info_size)
2626         s->mb_info_size += 12;
2627     write_mb_info(s);
2628 }
2629
2630 static int encode_thread(AVCodecContext *c, void *arg){
2631     MpegEncContext *s= *(void**)arg;
2632     int mb_x, mb_y, pdif = 0;
2633     int chr_h= 16>>s->chroma_y_shift;
2634     int i, j;
2635     MpegEncContext best_s = { 0 }, backup_s;
2636     uint8_t bit_buf[2][MAX_MB_BYTES];
2637     uint8_t bit_buf2[2][MAX_MB_BYTES];
2638     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2639     PutBitContext pb[2], pb2[2], tex_pb[2];
2640
2641     for(i=0; i<2; i++){
2642         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2643         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2644         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2645     }
2646
2647     s->last_bits= put_bits_count(&s->pb);
2648     s->mv_bits=0;
2649     s->misc_bits=0;
2650     s->i_tex_bits=0;
2651     s->p_tex_bits=0;
2652     s->i_count=0;
2653     s->f_count=0;
2654     s->b_count=0;
2655     s->skip_count=0;
2656
2657     for(i=0; i<3; i++){
2658         /* init last dc values */
2659         /* note: quant matrix value (8) is implied here */
2660         s->last_dc[i] = 128 << s->intra_dc_precision;
2661
2662         s->current_picture.encoding_error[i] = 0;
2663     }
2664     s->mb_skip_run = 0;
2665     memset(s->last_mv, 0, sizeof(s->last_mv));
2666
2667     s->last_mv_dir = 0;
2668
2669     switch(s->codec_id){
2670     case AV_CODEC_ID_H263:
2671     case AV_CODEC_ID_H263P:
2672     case AV_CODEC_ID_FLV1:
2673         if (CONFIG_H263_ENCODER)
2674             s->gob_index = H263_GOB_HEIGHT(s->height);
2675         break;
2676     case AV_CODEC_ID_MPEG4:
2677         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2678             ff_mpeg4_init_partitions(s);
2679         break;
2680     }
2681
2682     s->resync_mb_x=0;
2683     s->resync_mb_y=0;
2684     s->first_slice_line = 1;
2685     s->ptr_lastgob = s->pb.buf;
2686     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2687         s->mb_x=0;
2688         s->mb_y= mb_y;
2689
2690         ff_set_qscale(s, s->qscale);
2691         ff_init_block_index(s);
2692
2693         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2694             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2695             int mb_type= s->mb_type[xy];
2696 //            int d;
2697             int dmin= INT_MAX;
2698             int dir;
2699
2700             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2701                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2702                 return -1;
2703             }
2704             if(s->data_partitioning){
2705                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2706                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2707                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2708                     return -1;
2709                 }
2710             }
2711
2712             s->mb_x = mb_x;
2713             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2714             ff_update_block_index(s);
2715
2716             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2717                 ff_h261_reorder_mb_index(s);
2718                 xy= s->mb_y*s->mb_stride + s->mb_x;
2719                 mb_type= s->mb_type[xy];
2720             }
2721
2722             /* write gob / video packet header  */
2723             if(s->rtp_mode){
2724                 int current_packet_size, is_gob_start;
2725
2726                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2727
2728                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2729
2730                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2731
2732                 switch(s->codec_id){
2733                 case AV_CODEC_ID_H263:
2734                 case AV_CODEC_ID_H263P:
2735                     if(!s->h263_slice_structured)
2736                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2737                     break;
2738                 case AV_CODEC_ID_MPEG2VIDEO:
2739                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2740                 case AV_CODEC_ID_MPEG1VIDEO:
2741                     if(s->mb_skip_run) is_gob_start=0;
2742                     break;
2743                 }
2744
2745                 if(is_gob_start){
2746                     if(s->start_mb_y != mb_y || mb_x!=0){
2747                         write_slice_end(s);
2748
2749                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2750                             ff_mpeg4_init_partitions(s);
2751                         }
2752                     }
2753
2754                     assert((put_bits_count(&s->pb)&7) == 0);
2755                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2756
2757                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2758                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2759                         int d = 100 / s->error_rate;
2760                         if(r % d == 0){
2761                             current_packet_size=0;
2762                             s->pb.buf_ptr= s->ptr_lastgob;
2763                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2764                         }
2765                     }
2766
2767 #if FF_API_RTP_CALLBACK
2768 FF_DISABLE_DEPRECATION_WARNINGS
2769                     if (s->avctx->rtp_callback){
2770                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2771                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2772                     }
2773 FF_ENABLE_DEPRECATION_WARNINGS
2774 #endif
2775                     update_mb_info(s, 1);
2776
2777                     switch(s->codec_id){
2778                     case AV_CODEC_ID_MPEG4:
2779                         if (CONFIG_MPEG4_ENCODER) {
2780                             ff_mpeg4_encode_video_packet_header(s);
2781                             ff_mpeg4_clean_buffers(s);
2782                         }
2783                     break;
2784                     case AV_CODEC_ID_MPEG1VIDEO:
2785                     case AV_CODEC_ID_MPEG2VIDEO:
2786                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2787                             ff_mpeg1_encode_slice_header(s);
2788                             ff_mpeg1_clean_buffers(s);
2789                         }
2790                     break;
2791                     case AV_CODEC_ID_H263:
2792                     case AV_CODEC_ID_H263P:
2793                         if (CONFIG_H263_ENCODER)
2794                             ff_h263_encode_gob_header(s, mb_y);
2795                     break;
2796                     }
2797
2798                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2799                         int bits= put_bits_count(&s->pb);
2800                         s->misc_bits+= bits - s->last_bits;
2801                         s->last_bits= bits;
2802                     }
2803
2804                     s->ptr_lastgob += current_packet_size;
2805                     s->first_slice_line=1;
2806                     s->resync_mb_x=mb_x;
2807                     s->resync_mb_y=mb_y;
2808                 }
2809             }
2810
2811             if(  (s->resync_mb_x   == s->mb_x)
2812                && s->resync_mb_y+1 == s->mb_y){
2813                 s->first_slice_line=0;
2814             }
2815
2816             s->mb_skipped=0;
2817             s->dquant=0; //only for QP_RD
2818
2819             update_mb_info(s, 0);
2820
2821             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2822                 int next_block=0;
2823                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2824
2825                 copy_context_before_encode(&backup_s, s, -1);
2826                 backup_s.pb= s->pb;
2827                 best_s.data_partitioning= s->data_partitioning;
2828                 best_s.partitioned_frame= s->partitioned_frame;
2829                 if(s->data_partitioning){
2830                     backup_s.pb2= s->pb2;
2831                     backup_s.tex_pb= s->tex_pb;
2832                 }
2833
2834                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2835                     s->mv_dir = MV_DIR_FORWARD;
2836                     s->mv_type = MV_TYPE_16X16;
2837                     s->mb_intra= 0;
2838                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2839                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2840                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2841                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2842                 }
2843                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2844                     s->mv_dir = MV_DIR_FORWARD;
2845                     s->mv_type = MV_TYPE_FIELD;
2846                     s->mb_intra= 0;
2847                     for(i=0; i<2; i++){
2848                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2849                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2850                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2851                     }
2852                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2853                                  &dmin, &next_block, 0, 0);
2854                 }
2855                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2856                     s->mv_dir = MV_DIR_FORWARD;
2857                     s->mv_type = MV_TYPE_16X16;
2858                     s->mb_intra= 0;
2859                     s->mv[0][0][0] = 0;
2860                     s->mv[0][0][1] = 0;
2861                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2862                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2863                 }
2864                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2865                     s->mv_dir = MV_DIR_FORWARD;
2866                     s->mv_type = MV_TYPE_8X8;
2867                     s->mb_intra= 0;
2868                     for(i=0; i<4; i++){
2869                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2870                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2871                     }
2872                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2873                                  &dmin, &next_block, 0, 0);
2874                 }
2875                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2876                     s->mv_dir = MV_DIR_FORWARD;
2877                     s->mv_type = MV_TYPE_16X16;
2878                     s->mb_intra= 0;
2879                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2880                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2881                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2882                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2883                 }
2884                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2885                     s->mv_dir = MV_DIR_BACKWARD;
2886                     s->mv_type = MV_TYPE_16X16;
2887                     s->mb_intra= 0;
2888                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2889                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2890                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2891                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2892                 }
2893                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2894                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2895                     s->mv_type = MV_TYPE_16X16;
2896                     s->mb_intra= 0;
2897                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2898                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2899                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2900                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2901                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2902                                  &dmin, &next_block, 0, 0);
2903                 }
2904                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2905                     s->mv_dir = MV_DIR_FORWARD;
2906                     s->mv_type = MV_TYPE_FIELD;
2907                     s->mb_intra= 0;
2908                     for(i=0; i<2; i++){
2909                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2910                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2911                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2912                     }
2913                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2914                                  &dmin, &next_block, 0, 0);
2915                 }
2916                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2917                     s->mv_dir = MV_DIR_BACKWARD;
2918                     s->mv_type = MV_TYPE_FIELD;
2919                     s->mb_intra= 0;
2920                     for(i=0; i<2; i++){
2921                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2922                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2923                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2924                     }
2925                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2926                                  &dmin, &next_block, 0, 0);
2927                 }
2928                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2929                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2930                     s->mv_type = MV_TYPE_FIELD;
2931                     s->mb_intra= 0;
2932                     for(dir=0; dir<2; dir++){
2933                         for(i=0; i<2; i++){
2934                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2935                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2936                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2937                         }
2938                     }
2939                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2940                                  &dmin, &next_block, 0, 0);
2941                 }
2942                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2943                     s->mv_dir = 0;
2944                     s->mv_type = MV_TYPE_16X16;
2945                     s->mb_intra= 1;
2946                     s->mv[0][0][0] = 0;
2947                     s->mv[0][0][1] = 0;
2948                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2949                                  &dmin, &next_block, 0, 0);
2950                     if(s->h263_pred || s->h263_aic){
2951                         if(best_s.mb_intra)
2952                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2953                         else
2954                             ff_clean_intra_table_entries(s); //old mode?
2955                     }
2956                 }
2957
2958                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2959                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2960                         const int last_qp= backup_s.qscale;
2961                         int qpi, qp, dc[6];
2962                         int16_t ac[6][16];
2963                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2964                         static const int dquant_tab[4]={-1,1,-2,2};
2965
2966                         assert(backup_s.dquant == 0);
2967
2968                         //FIXME intra
2969                         s->mv_dir= best_s.mv_dir;
2970                         s->mv_type = MV_TYPE_16X16;
2971                         s->mb_intra= best_s.mb_intra;
2972                         s->mv[0][0][0] = best_s.mv[0][0][0];
2973                         s->mv[0][0][1] = best_s.mv[0][0][1];
2974                         s->mv[1][0][0] = best_s.mv[1][0][0];
2975                         s->mv[1][0][1] = best_s.mv[1][0][1];
2976
2977                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2978                         for(; qpi<4; qpi++){
2979                             int dquant= dquant_tab[qpi];
2980                             qp= last_qp + dquant;
2981                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2982                                 continue;
2983                             backup_s.dquant= dquant;
2984                             if(s->mb_intra && s->dc_val[0]){
2985                                 for(i=0; i<6; i++){
2986                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2987                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2988                                 }
2989                             }
2990
2991                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2992                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2993                             if(best_s.qscale != qp){
2994                                 if(s->mb_intra && s->dc_val[0]){
2995                                     for(i=0; i<6; i++){
2996                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2997                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2998                                     }
2999                                 }
3000                             }
3001                         }
3002                     }
3003                 }
3004                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3005                     int mx= s->b_direct_mv_table[xy][0];
3006                     int my= s->b_direct_mv_table[xy][1];
3007
3008                     backup_s.dquant = 0;
3009                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3010                     s->mb_intra= 0;
3011                     ff_mpeg4_set_direct_mv(s, mx, my);
3012                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3013                                  &dmin, &next_block, mx, my);
3014                 }
3015                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3016                     backup_s.dquant = 0;
3017                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3018                     s->mb_intra= 0;
3019                     ff_mpeg4_set_direct_mv(s, 0, 0);
3020                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3021                                  &dmin, &next_block, 0, 0);
3022                 }
3023                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3024                     int coded=0;
3025                     for(i=0; i<6; i++)
3026                         coded |= s->block_last_index[i];
3027                     if(coded){
3028                         int mx,my;
3029                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3030                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3031                             mx=my=0; //FIXME find the one we actually used
3032                             ff_mpeg4_set_direct_mv(s, mx, my);
3033                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3034                             mx= s->mv[1][0][0];
3035                             my= s->mv[1][0][1];
3036                         }else{
3037                             mx= s->mv[0][0][0];
3038                             my= s->mv[0][0][1];
3039                         }
3040
3041                         s->mv_dir= best_s.mv_dir;
3042                         s->mv_type = best_s.mv_type;
3043                         s->mb_intra= 0;
3044 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3045                         s->mv[0][0][1] = best_s.mv[0][0][1];
3046                         s->mv[1][0][0] = best_s.mv[1][0][0];
3047                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3048                         backup_s.dquant= 0;
3049                         s->skipdct=1;
3050                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3051                                         &dmin, &next_block, mx, my);
3052                         s->skipdct=0;
3053                     }
3054                 }
3055
3056                 s->current_picture.qscale_table[xy] = best_s.qscale;
3057
3058                 copy_context_after_encode(s, &best_s, -1);
3059
3060                 pb_bits_count= put_bits_count(&s->pb);
3061                 flush_put_bits(&s->pb);
3062                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3063                 s->pb= backup_s.pb;
3064
3065                 if(s->data_partitioning){
3066                     pb2_bits_count= put_bits_count(&s->pb2);
3067                     flush_put_bits(&s->pb2);
3068                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3069                     s->pb2= backup_s.pb2;
3070
3071                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3072                     flush_put_bits(&s->tex_pb);
3073                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3074                     s->tex_pb= backup_s.tex_pb;
3075                 }
3076                 s->last_bits= put_bits_count(&s->pb);
3077
3078                 if (CONFIG_H263_ENCODER &&
3079                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3080                     ff_h263_update_motion_val(s);
3081
3082                 if(next_block==0){ //FIXME 16 vs linesize16
3083                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3084                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3085                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3086                 }
3087
3088                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3089                     ff_mpv_decode_mb(s, s->block);
3090             } else {
3091                 int motion_x = 0, motion_y = 0;
3092                 s->mv_type=MV_TYPE_16X16;
3093                 // only one MB-Type possible
3094
3095                 switch(mb_type){
3096                 case CANDIDATE_MB_TYPE_INTRA:
3097                     s->mv_dir = 0;
3098                     s->mb_intra= 1;
3099                     motion_x= s->mv[0][0][0] = 0;
3100                     motion_y= s->mv[0][0][1] = 0;
3101                     break;
3102                 case CANDIDATE_MB_TYPE_INTER:
3103                     s->mv_dir = MV_DIR_FORWARD;
3104                     s->mb_intra= 0;
3105                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3106                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3107                     break;
3108                 case CANDIDATE_MB_TYPE_INTER_I:
3109                     s->mv_dir = MV_DIR_FORWARD;
3110                     s->mv_type = MV_TYPE_FIELD;
3111                     s->mb_intra= 0;
3112                     for(i=0; i<2; i++){
3113                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3114                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3115                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3116                     }
3117                     break;
3118                 case CANDIDATE_MB_TYPE_INTER4V:
3119                     s->mv_dir = MV_DIR_FORWARD;
3120                     s->mv_type = MV_TYPE_8X8;
3121                     s->mb_intra= 0;
3122                     for(i=0; i<4; i++){
3123                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3124                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3125                     }
3126                     break;
3127                 case CANDIDATE_MB_TYPE_DIRECT:
3128                     if (CONFIG_MPEG4_ENCODER) {
3129                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3130                         s->mb_intra= 0;
3131                         motion_x=s->b_direct_mv_table[xy][0];
3132                         motion_y=s->b_direct_mv_table[xy][1];
3133                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3134                     }
3135                     break;
3136                 case CANDIDATE_MB_TYPE_DIRECT0:
3137                     if (CONFIG_MPEG4_ENCODER) {
3138                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3139                         s->mb_intra= 0;
3140                         ff_mpeg4_set_direct_mv(s, 0, 0);
3141                     }
3142                     break;
3143                 case CANDIDATE_MB_TYPE_BIDIR:
3144                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3145                     s->mb_intra= 0;
3146                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3147                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3148                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3149                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3150                     break;
3151                 case CANDIDATE_MB_TYPE_BACKWARD:
3152                     s->mv_dir = MV_DIR_BACKWARD;
3153                     s->mb_intra= 0;
3154                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3155                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3156                     break;
3157                 case CANDIDATE_MB_TYPE_FORWARD:
3158                     s->mv_dir = MV_DIR_FORWARD;
3159                     s->mb_intra= 0;
3160                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3161                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3162                     break;
3163                 case CANDIDATE_MB_TYPE_FORWARD_I:
3164                     s->mv_dir = MV_DIR_FORWARD;
3165                     s->mv_type = MV_TYPE_FIELD;
3166                     s->mb_intra= 0;
3167                     for(i=0; i<2; i++){
3168                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3169                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3170                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3171                     }
3172                     break;
3173                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3174                     s->mv_dir = MV_DIR_BACKWARD;
3175                     s->mv_type = MV_TYPE_FIELD;
3176                     s->mb_intra= 0;
3177                     for(i=0; i<2; i++){
3178                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3179                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3180                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3181                     }
3182                     break;
3183                 case CANDIDATE_MB_TYPE_BIDIR_I:
3184                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3185                     s->mv_type = MV_TYPE_FIELD;
3186                     s->mb_intra= 0;
3187                     for(dir=0; dir<2; dir++){
3188                         for(i=0; i<2; i++){
3189                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3190                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3191                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3192                         }
3193                     }
3194                     break;
3195                 default:
3196                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3197                 }
3198
3199                 encode_mb(s, motion_x, motion_y);
3200
3201                 // RAL: Update last macroblock type
3202                 s->last_mv_dir = s->mv_dir;
3203
3204                 if (CONFIG_H263_ENCODER &&
3205                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3206                     ff_h263_update_motion_val(s);
3207
3208                 ff_mpv_decode_mb(s, s->block);
3209             }
3210
3211             /* clean the MV table in IPS frames for direct mode in B frames */
3212             if(s->mb_intra /* && I,P,S_TYPE */){
3213                 s->p_mv_table[xy][0]=0;
3214                 s->p_mv_table[xy][1]=0;
3215             }
3216
3217             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3218                 int w= 16;
3219                 int h= 16;
3220
3221                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3222                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3223
3224                 s->current_picture.encoding_error[0] += sse(
3225                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3226                     s->dest[0], w, h, s->linesize);
3227                 s->current_picture.encoding_error[1] += sse(
3228                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3229                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3230                 s->current_picture.encoding_error[2] += sse(
3231                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3232                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3233             }
3234             if(s->loop_filter){
3235                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3236                     ff_h263_loop_filter(s);
3237             }
3238             ff_dlog(s->avctx, "MB %d %d bits\n",
3239                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3240         }
3241     }
3242
3243     //not beautiful here but we must write it before flushing so it has to be here
3244     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3245         ff_msmpeg4_encode_ext_header(s);
3246
3247     write_slice_end(s);
3248
3249 #if FF_API_RTP_CALLBACK
3250 FF_DISABLE_DEPRECATION_WARNINGS
3251     /* Send the last GOB if RTP */
3252     if (s->avctx->rtp_callback) {
3253         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3254         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3255         /* Call the RTP callback to send the last GOB */
3256         emms_c();
3257         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3258     }
3259 FF_ENABLE_DEPRECATION_WARNINGS
3260 #endif
3261
3262     return 0;
3263 }
3264
3265 #define MERGE(field) dst->field += src->field; src->field=0
3266 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3267     MERGE(me.scene_change_score);
3268     MERGE(me.mc_mb_var_sum_temp);
3269     MERGE(me.mb_var_sum_temp);
3270 }
3271
3272 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3273     int i;
3274
3275     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3276     MERGE(dct_count[1]);
3277     MERGE(mv_bits);
3278     MERGE(i_tex_bits);
3279     MERGE(p_tex_bits);
3280     MERGE(i_count);
3281     MERGE(f_count);
3282     MERGE(b_count);
3283     MERGE(skip_count);
3284     MERGE(misc_bits);
3285     MERGE(er.error_count);
3286     MERGE(padding_bug_score);
3287     MERGE(current_picture.encoding_error[0]);
3288     MERGE(current_picture.encoding_error[1]);
3289     MERGE(current_picture.encoding_error[2]);
3290
3291     if(dst->avctx->noise_reduction){
3292         for(i=0; i<64; i++){
3293             MERGE(dct_error_sum[0][i]);
3294             MERGE(dct_error_sum[1][i]);
3295         }
3296     }
3297
3298     assert(put_bits_count(&src->pb) % 8 ==0);
3299     assert(put_bits_count(&dst->pb) % 8 ==0);
3300     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3301     flush_put_bits(&dst->pb);
3302 }
3303
3304 static int estimate_qp(MpegEncContext *s, int dry_run){
3305     if (s->next_lambda){
3306         s->current_picture_ptr->f->quality =
3307         s->current_picture.f->quality = s->next_lambda;
3308         if(!dry_run) s->next_lambda= 0;
3309     } else if (!s->fixed_qscale) {
3310         s->current_picture_ptr->f->quality =
3311         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3312         if (s->current_picture.f->quality < 0)
3313             return -1;
3314     }
3315
3316     if(s->adaptive_quant){
3317         switch(s->codec_id){
3318         case AV_CODEC_ID_MPEG4:
3319             if (CONFIG_MPEG4_ENCODER)
3320                 ff_clean_mpeg4_qscales(s);
3321             break;
3322         case AV_CODEC_ID_H263:
3323         case AV_CODEC_ID_H263P:
3324         case AV_CODEC_ID_FLV1:
3325             if (CONFIG_H263_ENCODER)
3326                 ff_clean_h263_qscales(s);
3327             break;
3328         default:
3329             ff_init_qscale_tab(s);
3330         }
3331
3332         s->lambda= s->lambda_table[0];
3333         //FIXME broken
3334     }else
3335         s->lambda = s->current_picture.f->quality;
3336     update_qscale(s);
3337     return 0;
3338 }
3339
3340 /* must be called before writing the header */
3341 static void set_frame_distances(MpegEncContext * s){
3342     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3343     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3344
3345     if(s->pict_type==AV_PICTURE_TYPE_B){
3346         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3347         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3348     }else{
3349         s->pp_time= s->time - s->last_non_b_time;
3350         s->last_non_b_time= s->time;
3351         assert(s->picture_number==0 || s->pp_time > 0);
3352     }
3353 }
3354
3355 static int encode_picture(MpegEncContext *s, int picture_number)
3356 {
3357     int i, ret;
3358     int bits;
3359     int context_count = s->slice_context_count;
3360
3361     s->picture_number = picture_number;
3362
3363     /* Reset the average MB variance */
3364     s->me.mb_var_sum_temp    =
3365     s->me.mc_mb_var_sum_temp = 0;
3366
3367     /* we need to initialize some time vars before we can encode b-frames */
3368     // RAL: Condition added for MPEG1VIDEO
3369     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3370         set_frame_distances(s);
3371     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3372         ff_set_mpeg4_time(s);
3373
3374     s->me.scene_change_score=0;
3375
3376 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3377
3378     if(s->pict_type==AV_PICTURE_TYPE_I){
3379         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3380         else                        s->no_rounding=0;
3381     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3382         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3383             s->no_rounding ^= 1;
3384     }
3385
3386     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3387         if (estimate_qp(s,1) < 0)
3388             return -1;
3389         ff_get_2pass_fcode(s);
3390     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3391         if(s->pict_type==AV_PICTURE_TYPE_B)
3392             s->lambda= s->last_lambda_for[s->pict_type];
3393         else
3394             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3395         update_qscale(s);
3396     }
3397
3398     s->mb_intra=0; //for the rate distortion & bit compare functions
3399     for(i=1; i<context_count; i++){
3400         ret = ff_update_duplicate_context(s->thread_context[i], s);
3401         if (ret < 0)
3402             return ret;
3403     }
3404
3405     if(ff_init_me(s)<0)
3406         return -1;
3407
3408     /* Estimate motion for every MB */
3409     if(s->pict_type != AV_PICTURE_TYPE_I){
3410         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3411         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3412         if (s->pict_type != AV_PICTURE_TYPE_B) {
3413             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3414                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3415             }
3416         }
3417
3418         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3419     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3420         /* I-Frame */
3421         for(i=0; i<s->mb_stride*s->mb_height; i++)
3422             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3423
3424         if(!s->fixed_qscale){
3425             /* finding spatial complexity for I-frame rate control */
3426             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3427         }
3428     }
3429     for(i=1; i<context_count; i++){
3430         merge_context_after_me(s, s->thread_context[i]);
3431     }
3432     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3433     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3434     emms_c();
3435
3436     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3437         s->pict_type= AV_PICTURE_TYPE_I;
3438         for(i=0; i<s->mb_stride*s->mb_height; i++)
3439             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3440         ff_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3441                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3442     }
3443
3444     if(!s->umvplus){
3445         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3446             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3447
3448             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3449                 int a,b;
3450                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3451                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3452                 s->f_code= FFMAX3(s->f_code, a, b);
3453             }
3454
3455             ff_fix_long_p_mvs(s);
3456             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3457             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3458                 int j;
3459                 for(i=0; i<2; i++){
3460                     for(j=0; j<2; j++)
3461                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3462                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3463                 }
3464             }
3465         }
3466
3467         if(s->pict_type==AV_PICTURE_TYPE_B){
3468             int a, b;
3469
3470             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3471             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3472             s->f_code = FFMAX(a, b);
3473
3474             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3475             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3476             s->b_code = FFMAX(a, b);
3477
3478             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3479             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3480             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3481             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3482             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3483                 int dir, j;
3484                 for(dir=0; dir<2; dir++){
3485                     for(i=0; i<2; i++){
3486                         for(j=0; j<2; j++){
3487                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3488                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3489                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3490                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3491                         }
3492                     }
3493                 }
3494             }
3495         }
3496     }
3497
3498     if (estimate_qp(s, 0) < 0)
3499         return -1;
3500
3501     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3502         s->pict_type == AV_PICTURE_TYPE_I &&
3503         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3504         s->qscale= 3; //reduce clipping problems
3505
3506     if (s->out_format == FMT_MJPEG) {
3507         /* for mjpeg, we do include qscale in the matrix */
3508         for(i=1;i<64;i++){
3509             int j = s->idsp.idct_permutation[i];
3510
3511             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3512         }
3513         s->y_dc_scale_table=
3514         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3515         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3516         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3517                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3518         s->qscale= 8;
3519     }
3520
3521     //FIXME var duplication
3522     s->current_picture_ptr->f->key_frame =
3523     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3524     s->current_picture_ptr->f->pict_type =
3525     s->current_picture.f->pict_type = s->pict_type;
3526
3527     if (s->current_picture.f->key_frame)
3528         s->picture_in_gop_number=0;
3529
3530     s->last_bits= put_bits_count(&s->pb);
3531     switch(s->out_format) {
3532     case FMT_MJPEG:
3533         if (CONFIG_MJPEG_ENCODER)
3534             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3535                                            s->intra_matrix);
3536         break;
3537     case FMT_H261:
3538         if (CONFIG_H261_ENCODER)
3539             ff_h261_encode_picture_header(s, picture_number);
3540         break;
3541     case FMT_H263:
3542         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3543             ff_wmv2_encode_picture_header(s, picture_number);
3544         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3545             ff_msmpeg4_encode_picture_header(s, picture_number);
3546         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3547             ff_mpeg4_encode_picture_header(s, picture_number);
3548         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3549             ret = ff_rv10_encode_picture_header(s, picture_number);
3550             if (ret < 0)
3551                 return ret;
3552         }
3553         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3554             ff_rv20_encode_picture_header(s, picture_number);
3555         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3556             ff_flv_encode_picture_header(s, picture_number);
3557         else if (CONFIG_H263_ENCODER)
3558             ff_h263_encode_picture_header(s, picture_number);
3559         break;
3560     case FMT_MPEG1:
3561         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3562             ff_mpeg1_encode_picture_header(s, picture_number);
3563         break;
3564     default:
3565         assert(0);
3566     }
3567     bits= put_bits_count(&s->pb);
3568     s->header_bits= bits - s->last_bits;
3569
3570     for(i=1; i<context_count; i++){
3571         update_duplicate_context_after_me(s->thread_context[i], s);
3572     }
3573     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3574     for(i=1; i<context_count; i++){
3575         merge_context_after_encode(s, s->thread_context[i]);
3576     }
3577     emms_c();
3578     return 0;
3579 }
3580
3581 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3582     const int intra= s->mb_intra;
3583     int i;
3584
3585     s->dct_count[intra]++;
3586
3587     for(i=0; i<64; i++){
3588         int level= block[i];
3589
3590         if(level){
3591             if(level>0){
3592                 s->dct_error_sum[intra][i] += level;
3593                 level -= s->dct_offset[intra][i];
3594                 if(level<0) level=0;
3595             }else{
3596                 s->dct_error_sum[intra][i] -= level;
3597                 level += s->dct_offset[intra][i];
3598                 if(level>0) level=0;
3599             }
3600             block[i]= level;
3601         }
3602     }
3603 }
3604
3605 static int dct_quantize_trellis_c(MpegEncContext *s,
3606                                   int16_t *block, int n,
3607                                   int qscale, int *overflow){
3608     const int *qmat;
3609     const uint8_t *scantable= s->intra_scantable.scantable;
3610     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3611     int max=0;
3612     unsigned int threshold1, threshold2;
3613     int bias=0;
3614     int run_tab[65];
3615     int level_tab[65];
3616     int score_tab[65];
3617     int survivor[65];
3618     int survivor_count;
3619     int last_run=0;
3620     int last_level=0;
3621     int last_score= 0;
3622     int last_i;
3623     int coeff[2][64];
3624     int coeff_count[64];
3625     int qmul, qadd, start_i, last_non_zero, i, dc;
3626     const int esc_length= s->ac_esc_length;
3627     uint8_t * length;
3628     uint8_t * last_length;
3629     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3630
3631     s->fdsp.fdct(block);
3632
3633     if(s->dct_error_sum)
3634         s->denoise_dct(s, block);
3635     qmul= qscale*16;
3636     qadd= ((qscale-1)|1)*8;
3637
3638     if (s->mb_intra) {
3639         int q;
3640         if (!s->h263_aic) {
3641             if (n < 4)
3642                 q = s->y_dc_scale;
3643             else
3644                 q = s->c_dc_scale;
3645             q = q << 3;
3646         } else{
3647             /* For AIC we skip quant/dequant of INTRADC */
3648             q = 1 << 3;
3649             qadd=0;
3650         }
3651
3652         /* note: block[0] is assumed to be positive */
3653         block[0] = (block[0] + (q >> 1)) / q;
3654         start_i = 1;
3655         last_non_zero = 0;
3656         qmat = s->q_intra_matrix[qscale];
3657         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3658             bias= 1<<(QMAT_SHIFT-1);
3659         length     = s->intra_ac_vlc_length;
3660         last_length= s->intra_ac_vlc_last_length;
3661     } else {
3662         start_i = 0;
3663         last_non_zero = -1;
3664         qmat = s->q_inter_matrix[qscale];
3665         length     = s->inter_ac_vlc_length;
3666         last_length= s->inter_ac_vlc_last_length;
3667     }
3668     last_i= start_i;
3669
3670     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3671     threshold2= (threshold1<<1);
3672
3673     for(i=63; i>=start_i; i--) {
3674         const int j = scantable[i];
3675         int level = block[j] * qmat[j];
3676
3677         if(((unsigned)(level+threshold1))>threshold2){
3678             last_non_zero = i;
3679             break;
3680         }
3681     }
3682
3683     for(i=start_i; i<=last_non_zero; i++) {
3684         const int j = scantable[i];
3685         int level = block[j] * qmat[j];
3686
3687 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3688 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3689         if(((unsigned)(level+threshold1))>threshold2){
3690             if(level>0){
3691                 level= (bias + level)>>QMAT_SHIFT;
3692                 coeff[0][i]= level;
3693                 coeff[1][i]= level-1;
3694 //                coeff[2][k]= level-2;
3695             }else{
3696                 level= (bias - level)>>QMAT_SHIFT;
3697                 coeff[0][i]= -level;
3698                 coeff[1][i]= -level+1;
3699 //                coeff[2][k]= -level+2;
3700             }
3701             coeff_count[i]= FFMIN(level, 2);
3702             assert(coeff_count[i]);
3703             max |=level;
3704         }else{
3705             coeff[0][i]= (level>>31)|1;
3706             coeff_count[i]= 1;
3707         }
3708     }
3709
3710     *overflow= s->max_qcoeff < max; //overflow might have happened
3711
3712     if(last_non_zero < start_i){
3713         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3714         return last_non_zero;
3715     }
3716
3717     score_tab[start_i]= 0;
3718     survivor[0]= start_i;
3719     survivor_count= 1;
3720
3721     for(i=start_i; i<=last_non_zero; i++){
3722         int level_index, j, zero_distortion;
3723         int dct_coeff= FFABS(block[ scantable[i] ]);
3724         int best_score=256*256*256*120;
3725
3726         if (s->fdsp.fdct == ff_fdct_ifast)
3727             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3728         zero_distortion= dct_coeff*dct_coeff;
3729
3730         for(level_index=0; level_index < coeff_count[i]; level_index++){
3731             int distortion;
3732             int level= coeff[level_index][i];
3733             const int alevel= FFABS(level);
3734             int unquant_coeff;
3735
3736             assert(level);
3737
3738             if(s->out_format == FMT_H263){
3739                 unquant_coeff= alevel*qmul + qadd;
3740             }else{ //MPEG1
3741                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3742                 if(s->mb_intra){
3743                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3744                         unquant_coeff =   (unquant_coeff - 1) | 1;
3745                 }else{
3746                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3747                         unquant_coeff =   (unquant_coeff - 1) | 1;
3748                 }
3749                 unquant_coeff<<= 3;
3750             }
3751
3752             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3753             level+=64;
3754             if((level&(~127)) == 0){
3755                 for(j=survivor_count-1; j>=0; j--){
3756                     int run= i - survivor[j];
3757                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3758                     score += score_tab[i-run];
3759
3760                     if(score < best_score){
3761                         best_score= score;
3762                         run_tab[i+1]= run;
3763                         level_tab[i+1]= level-64;
3764                     }
3765                 }
3766
3767                 if(s->out_format == FMT_H263){
3768                     for(j=survivor_count-1; j>=0; j--){
3769                         int run= i - survivor[j];
3770                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3771                         score += score_tab[i-run];
3772                         if(score < last_score){
3773                             last_score= score;
3774                             last_run= run;
3775                             last_level= level-64;
3776                             last_i= i+1;
3777                         }
3778                     }
3779                 }
3780             }else{
3781                 distortion += esc_length*lambda;
3782                 for(j=survivor_count-1; j>=0; j--){
3783                     int run= i - survivor[j];
3784                     int score= distortion + score_tab[i-run];
3785
3786                     if(score < best_score){
3787                         best_score= score;
3788                         run_tab[i+1]= run;
3789                         level_tab[i+1]= level-64;
3790                     }
3791                 }
3792
3793                 if(s->out_format == FMT_H263){
3794                   for(j=survivor_count-1; j>=0; j--){
3795                         int run= i - survivor[j];
3796                         int score= distortion + score_tab[i-run];
3797                         if(score < last_score){
3798                             last_score= score;
3799                             last_run= run;
3800                             last_level= level-64;
3801                             last_i= i+1;
3802                         }
3803                     }
3804                 }
3805             }
3806         }
3807
3808         score_tab[i+1]= best_score;
3809
3810         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3811         if(last_non_zero <= 27){
3812             for(; survivor_count; survivor_count--){
3813                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3814                     break;
3815             }
3816         }else{
3817             for(; survivor_count; survivor_count--){
3818                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3819                     break;
3820             }
3821         }
3822
3823         survivor[ survivor_count++ ]= i+1;
3824     }
3825
3826     if(s->out_format != FMT_H263){
3827         last_score= 256*256*256*120;
3828         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3829             int score= score_tab[i];
3830             if(i) score += lambda*2; //FIXME exacter?
3831
3832             if(score < last_score){
3833                 last_score= score;
3834                 last_i= i;
3835                 last_level= level_tab[i];
3836                 last_run= run_tab[i];
3837             }
3838         }
3839     }
3840
3841     s->coded_score[n] = last_score;
3842
3843     dc= FFABS(block[0]);
3844     last_non_zero= last_i - 1;
3845     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3846
3847     if(last_non_zero < start_i)
3848         return last_non_zero;
3849
3850     if(last_non_zero == 0 && start_i == 0){
3851         int best_level= 0;
3852         int best_score= dc * dc;
3853
3854         for(i=0; i<coeff_count[0]; i++){
3855             int level= coeff[i][0];
3856             int alevel= FFABS(level);
3857             int unquant_coeff, score, distortion;
3858
3859             if(s->out_format == FMT_H263){
3860                     unquant_coeff= (alevel*qmul + qadd)>>3;
3861             }else{ //MPEG1
3862                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3863                     unquant_coeff =   (unquant_coeff - 1) | 1;
3864             }
3865             unquant_coeff = (unquant_coeff + 4) >> 3;
3866             unquant_coeff<<= 3 + 3;
3867
3868             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3869             level+=64;
3870             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3871             else                    score= distortion + esc_length*lambda;
3872
3873             if(score < best_score){
3874                 best_score= score;
3875                 best_level= level - 64;
3876             }
3877         }
3878         block[0]= best_level;
3879         s->coded_score[n] = best_score - dc*dc;
3880         if(best_level == 0) return -1;
3881         else                return last_non_zero;
3882     }
3883
3884     i= last_i;
3885     assert(last_level);
3886
3887     block[ perm_scantable[last_non_zero] ]= last_level;
3888     i -= last_run + 1;
3889
3890     for(; i>start_i; i -= run_tab[i] + 1){
3891         block[ perm_scantable[i-1] ]= level_tab[i];
3892     }
3893
3894     return last_non_zero;
3895 }
3896
3897 //#define REFINE_STATS 1
3898 static int16_t basis[64][64];
3899
3900 static void build_basis(uint8_t *perm){
3901     int i, j, x, y;
3902     emms_c();
3903     for(i=0; i<8; i++){
3904         for(j=0; j<8; j++){
3905             for(y=0; y<8; y++){
3906                 for(x=0; x<8; x++){
3907                     double s= 0.25*(1<<BASIS_SHIFT);
3908                     int index= 8*i + j;
3909                     int perm_index= perm[index];
3910                     if(i==0) s*= sqrt(0.5);
3911                     if(j==0) s*= sqrt(0.5);
3912                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3913                 }
3914             }
3915         }
3916     }
3917 }
3918
3919 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3920                         int16_t *block, int16_t *weight, int16_t *orig,
3921                         int n, int qscale){
3922     int16_t rem[64];
3923     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3924     const uint8_t *scantable= s->intra_scantable.scantable;
3925     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3926 //    unsigned int threshold1, threshold2;
3927 //    int bias=0;
3928     int run_tab[65];
3929     int prev_run=0;
3930     int prev_level=0;
3931     int qmul, qadd, start_i, last_non_zero, i, dc;
3932     uint8_t * length;
3933     uint8_t * last_length;
3934     int lambda;
3935     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3936 #ifdef REFINE_STATS
3937 static int count=0;
3938 static int after_last=0;
3939 static int to_zero=0;
3940 static int from_zero=0;
3941 static int raise=0;
3942 static int lower=0;
3943 static int messed_sign=0;
3944 #endif
3945
3946     if(basis[0][0] == 0)
3947         build_basis(s->idsp.idct_permutation);
3948
3949     qmul= qscale*2;
3950     qadd= (qscale-1)|1;
3951     if (s->mb_intra) {
3952         if (!s->h263_aic) {
3953             if (n < 4)
3954                 q = s->y_dc_scale;
3955             else
3956                 q = s->c_dc_scale;
3957         } else{
3958             /* For AIC we skip quant/dequant of INTRADC */
3959             q = 1;
3960             qadd=0;
3961         }
3962         q <<= RECON_SHIFT-3;
3963         /* note: block[0] is assumed to be positive */
3964         dc= block[0]*q;
3965 //        block[0] = (block[0] + (q >> 1)) / q;
3966         start_i = 1;
3967 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3968 //            bias= 1<<(QMAT_SHIFT-1);
3969         length     = s->intra_ac_vlc_length;
3970         last_length= s->intra_ac_vlc_last_length;
3971     } else {
3972         dc= 0;
3973         start_i = 0;
3974         length     = s->inter_ac_vlc_length;
3975         last_length= s->inter_ac_vlc_last_length;
3976     }
3977     last_non_zero = s->block_last_index[n];
3978
3979 #ifdef REFINE_STATS
3980 {START_TIMER
3981 #endif
3982     dc += (1<<(RECON_SHIFT-1));
3983     for(i=0; i<64; i++){
3984         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3985     }
3986 #ifdef REFINE_STATS
3987 STOP_TIMER("memset rem[]")}
3988 #endif
3989     sum=0;
3990     for(i=0; i<64; i++){
3991         int one= 36;
3992         int qns=4;
3993         int w;
3994
3995         w= FFABS(weight[i]) + qns*one;
3996         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3997
3998         weight[i] = w;
3999 //        w=weight[i] = (63*qns + (w/2)) / w;
4000
4001         assert(w>0);
4002         assert(w<(1<<6));
4003         sum += w*w;
4004     }
4005     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4006 #ifdef REFINE_STATS
4007 {START_TIMER
4008 #endif
4009     run=0;
4010     rle_index=0;
4011     for(i=start_i; i<=last_non_zero; i++){
4012         int j= perm_scantable[i];
4013         const int level= block[j];
4014         int coeff;
4015
4016         if(level){
4017             if(level<0) coeff= qmul*level - qadd;
4018             else        coeff= qmul*level + qadd;
4019             run_tab[rle_index++]=run;
4020             run=0;
4021
4022             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4023         }else{
4024             run++;
4025         }
4026     }
4027 #ifdef REFINE_STATS
4028 if(last_non_zero>0){
4029 STOP_TIMER("init rem[]")
4030 }
4031 }
4032
4033 {START_TIMER
4034 #endif
4035     for(;;){
4036         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4037         int best_coeff=0;
4038         int best_change=0;
4039         int run2, best_unquant_change=0, analyze_gradient;
4040 #ifdef REFINE_STATS
4041 {START_TIMER
4042 #endif
4043         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4044
4045         if(analyze_gradient){
4046 #ifdef REFINE_STATS
4047 {START_TIMER
4048 #endif
4049             for(i=0; i<64; i++){
4050                 int w= weight[i];
4051
4052                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4053             }
4054 #ifdef REFINE_STATS
4055 STOP_TIMER("rem*w*w")}
4056 {START_TIMER
4057 #endif
4058             s->fdsp.fdct(d1);
4059 #ifdef REFINE_STATS
4060 STOP_TIMER("dct")}
4061 #endif
4062         }
4063
4064         if(start_i){
4065             const int level= block[0];
4066             int change, old_coeff;
4067
4068             assert(s->mb_intra);
4069
4070             old_coeff= q*level;
4071
4072             for(change=-1; change<=1; change+=2){
4073                 int new_level= level + change;
4074                 int score, new_coeff;
4075
4076                 new_coeff= q*new_level;
4077                 if(new_coeff >= 2048 || new_coeff < 0)
4078                     continue;
4079
4080                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4081                                                   new_coeff - old_coeff);
4082                 if(score<best_score){
4083                     best_score= score;
4084                     best_coeff= 0;
4085                     best_change= change;
4086                     best_unquant_change= new_coeff - old_coeff;
4087                 }
4088             }
4089         }
4090
4091         run=0;
4092         rle_index=0;
4093         run2= run_tab[rle_index++];
4094         prev_level=0;
4095         prev_run=0;
4096
4097         for(i=start_i; i<64; i++){
4098             int j= perm_scantable[i];
4099             const int level= block[j];
4100             int change, old_coeff;
4101
4102             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4103                 break;
4104
4105             if(level){
4106                 if(level<0) old_coeff= qmul*level - qadd;
4107                 else        old_coeff= qmul*level + qadd;
4108                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4109             }else{
4110                 old_coeff=0;
4111                 run2--;
4112                 assert(run2>=0 || i >= last_non_zero );
4113             }
4114
4115             for(change=-1; change<=1; change+=2){
4116                 int new_level= level + change;
4117                 int score, new_coeff, unquant_change;
4118
4119                 score=0;
4120                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4121                    continue;
4122
4123                 if(new_level){
4124                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4125                     else            new_coeff= qmul*new_level + qadd;
4126                     if(new_coeff >= 2048 || new_coeff <= -2048)
4127                         continue;
4128                     //FIXME check for overflow
4129
4130                     if(level){
4131                         if(level < 63 && level > -63){
4132                             if(i < last_non_zero)
4133                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4134                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4135                             else
4136                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4137                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4138                         }
4139                     }else{
4140                         assert(FFABS(new_level)==1);
4141
4142                         if(analyze_gradient){
4143                             int g= d1[ scantable[i] ];
4144                             if(g && (g^new_level) >= 0)
4145                                 continue;
4146                         }
4147
4148                         if(i < last_non_zero){
4149                             int next_i= i + run2 + 1;
4150                             int next_level= block[ perm_scantable[next_i] ] + 64;
4151
4152                             if(next_level&(~127))
4153                                 next_level= 0;
4154
4155                             if(next_i < last_non_zero)
4156                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4157                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4158                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4159                             else
4160                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4161                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4162                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4163                         }else{
4164                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4165                             if(prev_level){
4166                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4167                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4168                             }
4169                         }
4170                     }
4171                 }else{
4172                     new_coeff=0;
4173                     assert(FFABS(level)==1);
4174
4175                     if(i < last_non_zero){
4176                         int next_i= i + run2 + 1;
4177                         int next_level= block[ perm_scantable[next_i] ] + 64;
4178
4179                         if(next_level&(~127))
4180                             next_level= 0;
4181
4182                         if(next_i < last_non_zero)
4183                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4184                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4185                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4186                         else
4187                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4188                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4189                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4190                     }else{
4191                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4192                         if(prev_level){
4193                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4194                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4195                         }
4196                     }
4197                 }
4198
4199                 score *= lambda;
4200
4201                 unquant_change= new_coeff - old_coeff;
4202                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4203
4204                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4205                                                    unquant_change);
4206                 if(score<best_score){
4207                     best_score= score;
4208                     best_coeff= i;
4209                     best_change= change;
4210                     best_unquant_change= unquant_change;
4211                 }
4212             }
4213             if(level){
4214                 prev_level= level + 64;
4215                 if(prev_level&(~127))
4216                     prev_level= 0;
4217                 prev_run= run;
4218                 run=0;
4219             }else{
4220                 run++;
4221             }
4222         }
4223 #ifdef REFINE_STATS
4224 STOP_TIMER("iterative step")}
4225 #endif
4226
4227         if(best_change){
4228             int j= perm_scantable[ best_coeff ];
4229
4230             block[j] += best_change;
4231
4232             if(best_coeff > last_non_zero){
4233                 last_non_zero= best_coeff;
4234                 assert(block[j]);
4235 #ifdef REFINE_STATS
4236 after_last++;
4237 #endif
4238             }else{
4239 #ifdef REFINE_STATS
4240 if(block[j]){
4241     if(block[j] - best_change){
4242         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4243             raise++;
4244         }else{
4245             lower++;
4246         }
4247     }else{
4248         from_zero++;
4249     }
4250 }else{
4251     to_zero++;
4252 }
4253 #endif
4254                 for(; last_non_zero>=start_i; last_non_zero--){
4255                     if(block[perm_scantable[last_non_zero]])
4256                         break;
4257                 }
4258             }
4259 #ifdef REFINE_STATS
4260 count++;
4261 if(256*256*256*64 % count == 0){
4262     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4263 }
4264 #endif
4265             run=0;
4266             rle_index=0;
4267             for(i=start_i; i<=last_non_zero; i++){
4268                 int j= perm_scantable[i];
4269                 const int level= block[j];
4270
4271                  if(level){
4272                      run_tab[rle_index++]=run;
4273                      run=0;
4274                  }else{
4275                      run++;
4276                  }
4277             }
4278
4279             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4280         }else{
4281             break;
4282         }
4283     }
4284 #ifdef REFINE_STATS
4285 if(last_non_zero>0){
4286 STOP_TIMER("iterative search")
4287 }
4288 }
4289 #endif
4290
4291     return last_non_zero;
4292 }
4293
4294 /**
4295  * Permute an 8x8 block according to permuatation.
4296  * @param block the block which will be permuted according to
4297  *              the given permutation vector
4298  * @param permutation the permutation vector
4299  * @param last the last non zero coefficient in scantable order, used to
4300  *             speed the permutation up
4301  * @param scantable the used scantable, this is only used to speed the
4302  *                  permutation up, the block is not (inverse) permutated
4303  *                  to scantable order!
4304  */
4305 static void block_permute(int16_t *block, uint8_t *permutation,
4306                           const uint8_t *scantable, int last)
4307 {
4308     int i;
4309     int16_t temp[64];
4310
4311     if (last <= 0)
4312         return;
4313     //FIXME it is ok but not clean and might fail for some permutations
4314     // if (permutation[1] == 1)
4315     // return;
4316
4317     for (i = 0; i <= last; i++) {
4318         const int j = scantable[i];
4319         temp[j] = block[j];
4320         block[j] = 0;
4321     }
4322
4323     for (i = 0; i <= last; i++) {
4324         const int j = scantable[i];
4325         const int perm_j = permutation[j];
4326         block[perm_j] = temp[j];
4327     }
4328 }
4329
4330 int ff_dct_quantize_c(MpegEncContext *s,
4331                         int16_t *block, int n,
4332                         int qscale, int *overflow)
4333 {
4334     int i, j, level, last_non_zero, q, start_i;
4335     const int *qmat;
4336     const uint8_t *scantable= s->intra_scantable.scantable;
4337     int bias;
4338     int max=0;
4339     unsigned int threshold1, threshold2;
4340
4341     s->fdsp.fdct(block);
4342
4343     if(s->dct_error_sum)
4344         s->denoise_dct(s, block);
4345
4346     if (s->mb_intra) {
4347         if (!s->h263_aic) {
4348             if (n < 4)
4349                 q = s->y_dc_scale;
4350             else
4351                 q = s->c_dc_scale;
4352             q = q << 3;
4353         } else
4354             /* For AIC we skip quant/dequant of INTRADC */
4355             q = 1 << 3;
4356
4357         /* note: block[0] is assumed to be positive */
4358         block[0] = (block[0] + (q >> 1)) / q;
4359         start_i = 1;
4360         last_non_zero = 0;
4361         qmat = s->q_intra_matrix[qscale];
4362         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4363     } else {
4364         start_i = 0;
4365         last_non_zero = -1;
4366         qmat = s->q_inter_matrix[qscale];
4367         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4368     }
4369     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4370     threshold2= (threshold1<<1);
4371     for(i=63;i>=start_i;i--) {
4372         j = scantable[i];
4373         level = block[j] * qmat[j];
4374
4375         if(((unsigned)(level+threshold1))>threshold2){
4376             last_non_zero = i;
4377             break;
4378         }else{
4379             block[j]=0;
4380         }
4381     }
4382     for(i=start_i; i<=last_non_zero; i++) {
4383         j = scantable[i];
4384         level = block[j] * qmat[j];
4385
4386 //        if(   bias+level >= (1<<QMAT_SHIFT)
4387 //           || bias-level >= (1<<QMAT_SHIFT)){
4388         if(((unsigned)(level+threshold1))>threshold2){
4389             if(level>0){
4390                 level= (bias + level)>>QMAT_SHIFT;
4391                 block[j]= level;
4392             }else{
4393                 level= (bias - level)>>QMAT_SHIFT;
4394                 block[j]= -level;
4395             }
4396             max |=level;
4397         }else{
4398             block[j]=0;
4399         }
4400     }
4401     *overflow= s->max_qcoeff < max; //overflow might have happened
4402
4403     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4404     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4405         block_permute(block, s->idsp.idct_permutation,
4406                       scantable, last_non_zero);
4407
4408     return last_non_zero;
4409 }
4410
4411 #define OFFSET(x) offsetof(MpegEncContext, x)
4412 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4413 static const AVOption h263_options[] = {
4414     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4415     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4416     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4417     FF_MPV_COMMON_OPTS
4418     { NULL },
4419 };
4420
4421 static const AVClass h263_class = {
4422     .class_name = "H.263 encoder",
4423     .item_name  = av_default_item_name,
4424     .option     = h263_options,
4425     .version    = LIBAVUTIL_VERSION_INT,
4426 };
4427
4428 AVCodec ff_h263_encoder = {
4429     .name           = "h263",
4430     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4431     .type           = AVMEDIA_TYPE_VIDEO,
4432     .id             = AV_CODEC_ID_H263,
4433     .priv_data_size = sizeof(MpegEncContext),
4434     .init           = ff_mpv_encode_init,
4435     .encode2        = ff_mpv_encode_picture,
4436     .close          = ff_mpv_encode_end,
4437     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4438     .priv_class     = &h263_class,
4439 };
4440
4441 static const AVOption h263p_options[] = {
4442     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4443     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4444     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4445     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4446     FF_MPV_COMMON_OPTS
4447     { NULL },
4448 };
4449 static const AVClass h263p_class = {
4450     .class_name = "H.263p encoder",
4451     .item_name  = av_default_item_name,
4452     .option     = h263p_options,
4453     .version    = LIBAVUTIL_VERSION_INT,
4454 };
4455
4456 AVCodec ff_h263p_encoder = {
4457     .name           = "h263p",
4458     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4459     .type           = AVMEDIA_TYPE_VIDEO,
4460     .id             = AV_CODEC_ID_H263P,
4461     .priv_data_size = sizeof(MpegEncContext),
4462     .init           = ff_mpv_encode_init,
4463     .encode2        = ff_mpv_encode_picture,
4464     .close          = ff_mpv_encode_end,
4465     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4466     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4467     .priv_class     = &h263p_class,
4468 };
4469
4470 static const AVClass msmpeg4v2_class = {
4471     .class_name = "msmpeg4v2 encoder",
4472     .item_name  = av_default_item_name,
4473     .option     = ff_mpv_generic_options,
4474     .version    = LIBAVUTIL_VERSION_INT,
4475 };
4476
4477 AVCodec ff_msmpeg4v2_encoder = {
4478     .name           = "msmpeg4v2",
4479     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4480     .type           = AVMEDIA_TYPE_VIDEO,
4481     .id             = AV_CODEC_ID_MSMPEG4V2,
4482     .priv_data_size = sizeof(MpegEncContext),
4483     .init           = ff_mpv_encode_init,
4484     .encode2        = ff_mpv_encode_picture,
4485     .close          = ff_mpv_encode_end,
4486     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4487     .priv_class     = &msmpeg4v2_class,
4488 };
4489
4490 static const AVClass msmpeg4v3_class = {
4491     .class_name = "msmpeg4v3 encoder",
4492     .item_name  = av_default_item_name,
4493     .option     = ff_mpv_generic_options,
4494     .version    = LIBAVUTIL_VERSION_INT,
4495 };
4496
4497 AVCodec ff_msmpeg4v3_encoder = {
4498     .name           = "msmpeg4",
4499     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4500     .type           = AVMEDIA_TYPE_VIDEO,
4501     .id             = AV_CODEC_ID_MSMPEG4V3,
4502     .priv_data_size = sizeof(MpegEncContext),
4503     .init           = ff_mpv_encode_init,
4504     .encode2        = ff_mpv_encode_picture,
4505     .close          = ff_mpv_encode_end,
4506     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4507     .priv_class     = &msmpeg4v3_class,
4508 };
4509
4510 static const AVClass wmv1_class = {
4511     .class_name = "wmv1 encoder",
4512     .item_name  = av_default_item_name,
4513     .option     = ff_mpv_generic_options,
4514     .version    = LIBAVUTIL_VERSION_INT,
4515 };
4516
4517 AVCodec ff_wmv1_encoder = {
4518     .name           = "wmv1",
4519     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4520     .type           = AVMEDIA_TYPE_VIDEO,
4521     .id             = AV_CODEC_ID_WMV1,
4522     .priv_data_size = sizeof(MpegEncContext),
4523     .init           = ff_mpv_encode_init,
4524     .encode2        = ff_mpv_encode_picture,
4525     .close          = ff_mpv_encode_end,
4526     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4527     .priv_class     = &wmv1_class,
4528 };