git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "h261.h"
  44 #include "h263.h"
  45 #include "mjpegenc_common.h"
  46 #include "mathops.h"
  47 #include "mpegutils.h"
  48 #include "mjpegenc.h"
  49 #include "msmpeg4.h"
  50 #include "pixblockdsp.h"
  51 #include "qpeldsp.h"
  52 #include "faandct.h"
  53 #include "thread.h"
  54 #include "aandcttab.h"
  55 #include "flv.h"
  56 #include "mpeg4video.h"
  57 #include "internal.h"
  58 #include "bytestream.h"
  59 #include <limits.h>
  60 #include "sp5x.h"
  61
  62 #define QUANT_BIAS_SHIFT 8
  63
  64 #define QMAT_SHIFT_MMX 16
  65 #define QMAT_SHIFT 21
  66
  67 static int encode_picture(MpegEncContext *s, int picture_number);
  68 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  69 static int sse_mb(MpegEncContext *s);
  70 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  71 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  72
  73 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  74 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  75
  76 const AVOption ff_mpv_generic_options[] = {
  77     FF_MPV_COMMON_OPTS
  78     { NULL },
  79 };
  80
  81 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  82                        uint16_t (*qmat16)[2][64],
  83                        const uint16_t *quant_matrix,
  84                        int bias, int qmin, int qmax, int intra)
  85 {
  86     FDCTDSPContext *fdsp = &s->fdsp;
  87     int qscale;
  88     int shift = 0;
  89
  90     for (qscale = qmin; qscale <= qmax; qscale++) {
  91         int i;
  92         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  93 #if CONFIG_FAANDCT
  94             fdsp->fdct == ff_faandct            ||
  95 #endif /* CONFIG_FAANDCT */
  96             fdsp->fdct == ff_jpeg_fdct_islow_10) {
  97             for (i = 0; i < 64; i++) {
  98                 const int j = s->idsp.idct_permutation[i];
  99                 /* 16 <= qscale * quant_matrix[i] <= 7905
 100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 101                  *             19952 <=              x  <= 249205026
 102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 103                  *           3444240 >= (1 << 36) / (x) >= 275 */
 104
 105                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 106                                         (qscale * quant_matrix[j]));
 107             }
 108         } else if (fdsp->fdct == ff_fdct_ifast) {
 109             for (i = 0; i < 64; i++) {
 110                 const int j = s->idsp.idct_permutation[i];
 111                 /* 16 <= qscale * quant_matrix[i] <= 7905
 112                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 113                  *             19952 <=              x  <= 249205026
 114                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 115                  *           3444240 >= (1 << 36) / (x) >= 275 */
 116
 117                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 118                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
 119             }
 120         } else {
 121             for (i = 0; i < 64; i++) {
 122                 const int j = s->idsp.idct_permutation[i];
 123                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 124                  * Assume x = qscale * quant_matrix[i]
 125                  * So             16 <=              x  <= 7905
 126                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 127                  * so          32768 >= (1 << 19) / (x) >= 67 */
 128                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 129                                         (qscale * quant_matrix[j]));
 130                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 131                 //                    (qscale * quant_matrix[i]);
 132                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 133                                        (qscale * quant_matrix[j]);
 134
 135                 if (qmat16[qscale][0][i] == 0 ||
 136                     qmat16[qscale][0][i] == 128 * 256)
 137                     qmat16[qscale][0][i] = 128 * 256 - 1;
 138                 qmat16[qscale][1][i] =
 139                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 140                                 qmat16[qscale][0][i]);
 141             }
 142         }
 143
 144         for (i = intra; i < 64; i++) {
 145             int64_t max = 8191;
 146             if (fdsp->fdct == ff_fdct_ifast) {
 147                 max = (8191LL * ff_aanscales[i]) >> 14;
 148             }
 149             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 150                 shift++;
 151             }
 152         }
 153     }
 154     if (shift) {
 155         av_log(NULL, AV_LOG_INFO,
 156                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 157                QMAT_SHIFT - shift);
 158     }
 159 }
 160
 161 static inline void update_qscale(MpegEncContext *s)
 162 {
 163     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 164                 (FF_LAMBDA_SHIFT + 7);
 165     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 166
 167     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 168                  FF_LAMBDA_SHIFT;
 169 }
 170
 171 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 172 {
 173     int i;
 174
 175     if (matrix) {
 176         put_bits(pb, 1, 1);
 177         for (i = 0; i < 64; i++) {
 178             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 179         }
 180     } else
 181         put_bits(pb, 1, 0);
 182 }
 183
 184 /**
 185  * init s->current_picture.qscale_table from s->lambda_table
 186  */
 187 void ff_init_qscale_tab(MpegEncContext *s)
 188 {
 189     int8_t * const qscale_table = s->current_picture.qscale_table;
 190     int i;
 191
 192     for (i = 0; i < s->mb_num; i++) {
 193         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 194         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 195         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 196                                                   s->avctx->qmax);
 197     }
 198 }
 199
 200 static void update_duplicate_context_after_me(MpegEncContext *dst,
 201                                               MpegEncContext *src)
 202 {
 203 #define COPY(a) dst->a= src->a
 204     COPY(pict_type);
 205     COPY(current_picture);
 206     COPY(f_code);
 207     COPY(b_code);
 208     COPY(qscale);
 209     COPY(lambda);
 210     COPY(lambda2);
 211     COPY(picture_in_gop_number);
 212     COPY(gop_picture_number);
 213     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 214     COPY(progressive_frame);    // FIXME don't set in encode_header
 215     COPY(partitioned_frame);    // FIXME don't set in encode_header
 216 #undef COPY
 217 }
 218
 219 /**
 220  * Set the given MpegEncContext to defaults for encoding.
 221  * the changed fields will not depend upon the prior state of the MpegEncContext.
 222  */
 223 static void mpv_encode_defaults(MpegEncContext *s)
 224 {
 225     int i;
 226     ff_mpv_common_defaults(s);
 227
 228     for (i = -16; i < 16; i++) {
 229         default_fcode_tab[i + MAX_MV] = 1;
 230     }
 231     s->me.mv_penalty = default_mv_penalty;
 232     s->fcode_tab     = default_fcode_tab;
 233
 234     s->input_picture_number  = 0;
 235     s->picture_in_gop_number = 0;
 236 }
 237
 238 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 239     if (ARCH_X86)
 240         ff_dct_encode_init_x86(s);
 241
 242     if (CONFIG_H263_ENCODER)
 243         ff_h263dsp_init(&s->h263dsp);
 244     if (!s->dct_quantize)
 245         s->dct_quantize = ff_dct_quantize_c;
 246     if (!s->denoise_dct)
 247         s->denoise_dct  = denoise_dct_c;
 248     s->fast_dct_quantize = s->dct_quantize;
 249     if (s->avctx->trellis)
 250         s->dct_quantize  = dct_quantize_trellis_c;
 251
 252     return 0;
 253 }
 254
 255 /* init video encoder */
 256 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 257 {
 258     MpegEncContext *s = avctx->priv_data;
 259     int i, ret, format_supported;
 260
 261     mpv_encode_defaults(s);
 262
 263     switch (avctx->codec_id) {
 264     case AV_CODEC_ID_MPEG2VIDEO:
 265         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 266             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 267             av_log(avctx, AV_LOG_ERROR,
 268                    "only YUV420 and YUV422 are supported\n");
 269             return -1;
 270         }
 271         break;
 272     case AV_CODEC_ID_MJPEG:
 273     case AV_CODEC_ID_AMV:
 274         format_supported = 0;
 275         /* JPEG color space */
 276         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 277             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 278             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
 279             (avctx->color_range == AVCOL_RANGE_JPEG &&
 280              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 281               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 282               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
 283             format_supported = 1;
 284         /* MPEG color space */
 285         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 286                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 287                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 288                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
 289             format_supported = 1;
 290
 291         if (!format_supported) {
 292             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 293             return -1;
 294         }
 295         break;
 296     default:
 297         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 298             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 299             return -1;
 300         }
 301     }
 302
 303     switch (avctx->pix_fmt) {
 304     case AV_PIX_FMT_YUVJ444P:
 305     case AV_PIX_FMT_YUV444P:
 306         s->chroma_format = CHROMA_444;
 307         break;
 308     case AV_PIX_FMT_YUVJ422P:
 309     case AV_PIX_FMT_YUV422P:
 310         s->chroma_format = CHROMA_422;
 311         break;
 312     case AV_PIX_FMT_YUVJ420P:
 313     case AV_PIX_FMT_YUV420P:
 314     default:
 315         s->chroma_format = CHROMA_420;
 316         break;
 317     }
 318
 319     s->bit_rate = avctx->bit_rate;
 320     s->width    = avctx->width;
 321     s->height   = avctx->height;
 322     if (avctx->gop_size > 600 &&
 323         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 324         av_log(avctx, AV_LOG_WARNING,
 325                "keyframe interval too large!, reducing it from %d to %d\n",
 326                avctx->gop_size, 600);
 327         avctx->gop_size = 600;
 328     }
 329     s->gop_size     = avctx->gop_size;
 330     s->avctx        = avctx;
 331     s->flags        = avctx->flags;
 332     s->flags2       = avctx->flags2;
 333     if (avctx->max_b_frames > MAX_B_FRAMES) {
 334         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 335                "is %d.\n", MAX_B_FRAMES);
 336         avctx->max_b_frames = MAX_B_FRAMES;
 337     }
 338     s->max_b_frames = avctx->max_b_frames;
 339     s->codec_id     = avctx->codec->id;
 340     s->strict_std_compliance = avctx->strict_std_compliance;
 341     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 342     s->mpeg_quant         = avctx->mpeg_quant;
 343     s->rtp_mode           = !!avctx->rtp_payload_size;
 344     s->intra_dc_precision = avctx->intra_dc_precision;
 345
 346     // workaround some differences between how applications specify dc precission
 347     if (s->intra_dc_precision < 0) {
 348         s->intra_dc_precision += 8;
 349     } else if (s->intra_dc_precision >= 8)
 350         s->intra_dc_precision -= 8;
 351
 352     if (s->intra_dc_precision < 0) {
 353         av_log(avctx, AV_LOG_ERROR,
 354                 "intra dc precision must be positive, note some applications use"
 355                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
 356         return AVERROR(EINVAL);
 357     }
 358
 359     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
 360         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
 361         return AVERROR(EINVAL);
 362     }
 363     s->user_specified_pts = AV_NOPTS_VALUE;
 364
 365     if (s->gop_size <= 1) {
 366         s->intra_only = 1;
 367         s->gop_size   = 12;
 368     } else {
 369         s->intra_only = 0;
 370     }
 371
 372     s->me_method = avctx->me_method;
 373
 374     /* Fixed QSCALE */
 375     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 376
 377     s->adaptive_quant = (s->avctx->lumi_masking ||
 378                          s->avctx->dark_masking ||
 379                          s->avctx->temporal_cplx_masking ||
 380                          s->avctx->spatial_cplx_masking  ||
 381                          s->avctx->p_masking      ||
 382                          s->avctx->border_masking ||
 383                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 384                         !s->fixed_qscale;
 385
 386     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 387
 388     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 389         switch(avctx->codec_id) {
 390         case AV_CODEC_ID_MPEG1VIDEO:
 391         case AV_CODEC_ID_MPEG2VIDEO:
 392             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
 393             break;
 394         case AV_CODEC_ID_MPEG4:
 395         case AV_CODEC_ID_MSMPEG4V1:
 396         case AV_CODEC_ID_MSMPEG4V2:
 397         case AV_CODEC_ID_MSMPEG4V3:
 398             if       (avctx->rc_max_rate >= 15000000) {
 399                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
 400             } else if(avctx->rc_max_rate >=  2000000) {
 401                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
 402             } else if(avctx->rc_max_rate >=   384000) {
 403                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
 404             } else
 405                 avctx->rc_buffer_size = 40;
 406             avctx->rc_buffer_size *= 16384;
 407             break;
 408         }
 409         if (avctx->rc_buffer_size) {
 410             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 411         }
 412     }
 413
 414     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 415         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 416         return -1;
 417     }
 418
 419     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 420         av_log(avctx, AV_LOG_INFO,
 421                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 422     }
 423
 424     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 425         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 426         return -1;
 427     }
 428
 429     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 430         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 431         return -1;
 432     }
 433
 434     if (avctx->rc_max_rate &&
 435         avctx->rc_max_rate == avctx->bit_rate &&
 436         avctx->rc_max_rate != avctx->rc_min_rate) {
 437         av_log(avctx, AV_LOG_INFO,
 438                "impossible bitrate constraints, this will fail\n");
 439     }
 440
 441     if (avctx->rc_buffer_size &&
 442         avctx->bit_rate * (int64_t)avctx->time_base.num >
 443             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 444         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 445         return -1;
 446     }
 447
 448     if (!s->fixed_qscale &&
 449         avctx->bit_rate * av_q2d(avctx->time_base) >
 450             avctx->bit_rate_tolerance) {
 451         av_log(avctx, AV_LOG_WARNING,
 452                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 453         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 454     }
 455
 456     if (s->avctx->rc_max_rate &&
 457         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 458         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 459          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 460         90000LL * (avctx->rc_buffer_size - 1) >
 461             s->avctx->rc_max_rate * 0xFFFFLL) {
 462         av_log(avctx, AV_LOG_INFO,
 463                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 464                "specified vbv buffer is too large for the given bitrate!\n");
 465     }
 466
 467     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 468         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 469         s->codec_id != AV_CODEC_ID_FLV1) {
 470         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 471         return -1;
 472     }
 473
 474     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 475         av_log(avctx, AV_LOG_ERROR,
 476                "OBMC is only supported with simple mb decision\n");
 477         return -1;
 478     }
 479
 480     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 481         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 482         return -1;
 483     }
 484
 485     if (s->max_b_frames                    &&
 486         s->codec_id != AV_CODEC_ID_MPEG4      &&
 487         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 488         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 489         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 490         return -1;
 491     }
 492     if (s->max_b_frames < 0) {
 493         av_log(avctx, AV_LOG_ERROR,
 494                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 495         return -1;
 496     }
 497
 498     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 499          s->codec_id == AV_CODEC_ID_H263  ||
 500          s->codec_id == AV_CODEC_ID_H263P) &&
 501         (avctx->sample_aspect_ratio.num > 255 ||
 502          avctx->sample_aspect_ratio.den > 255)) {
 503         av_log(avctx, AV_LOG_WARNING,
 504                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 505                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 506         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 507                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 508     }
 509
 510     if ((s->codec_id == AV_CODEC_ID_H263  ||
 511          s->codec_id == AV_CODEC_ID_H263P) &&
 512         (avctx->width  > 2048 ||
 513          avctx->height > 1152 )) {
 514         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 515         return -1;
 516     }
 517     if ((s->codec_id == AV_CODEC_ID_H263  ||
 518          s->codec_id == AV_CODEC_ID_H263P) &&
 519         ((avctx->width &3) ||
 520          (avctx->height&3) )) {
 521         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 522         return -1;
 523     }
 524
 525     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 526         (avctx->width  > 4095 ||
 527          avctx->height > 4095 )) {
 528         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 529         return -1;
 530     }
 531
 532     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 533         (avctx->width  > 16383 ||
 534          avctx->height > 16383 )) {
 535         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 536         return -1;
 537     }
 538
 539     if (s->codec_id == AV_CODEC_ID_RV10 &&
 540         (avctx->width &15 ||
 541          avctx->height&15 )) {
 542         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 543         return AVERROR(EINVAL);
 544     }
 545
 546     if (s->codec_id == AV_CODEC_ID_RV20 &&
 547         (avctx->width &3 ||
 548          avctx->height&3 )) {
 549         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 550         return AVERROR(EINVAL);
 551     }
 552
 553     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 554          s->codec_id == AV_CODEC_ID_WMV2) &&
 555          avctx->width & 1) {
 556          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 557          return -1;
 558     }
 559
 560     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 561         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 562         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 563         return -1;
 564     }
 565
 566     // FIXME mpeg2 uses that too
 567     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 568                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 569         av_log(avctx, AV_LOG_ERROR,
 570                "mpeg2 style quantization not supported by codec\n");
 571         return -1;
 572     }
 573
 574     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 575         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 576         return -1;
 577     }
 578
 579     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 580         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 581         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 582         return -1;
 583     }
 584
 585     if (s->avctx->scenechange_threshold < 1000000000 &&
 586         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 587         av_log(avctx, AV_LOG_ERROR,
 588                "closed gop with scene change detection are not supported yet, "
 589                "set threshold to 1000000000\n");
 590         return -1;
 591     }
 592
 593     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 594         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 595             av_log(avctx, AV_LOG_ERROR,
 596                   "low delay forcing is only available for mpeg2\n");
 597             return -1;
 598         }
 599         if (s->max_b_frames != 0) {
 600             av_log(avctx, AV_LOG_ERROR,
 601                    "b frames cannot be used with low delay\n");
 602             return -1;
 603         }
 604     }
 605
 606     if (s->q_scale_type == 1) {
 607         if (avctx->qmax > 12) {
 608             av_log(avctx, AV_LOG_ERROR,
 609                    "non linear quant only supports qmax <= 12 currently\n");
 610             return -1;
 611         }
 612     }
 613
 614     if (s->avctx->thread_count > 1         &&
 615         s->codec_id != AV_CODEC_ID_MPEG4      &&
 616         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 617         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 618         s->codec_id != AV_CODEC_ID_MJPEG      &&
 619         (s->codec_id != AV_CODEC_ID_H263P)) {
 620         av_log(avctx, AV_LOG_ERROR,
 621                "multi threaded encoding not supported by codec\n");
 622         return -1;
 623     }
 624
 625     if (s->avctx->thread_count < 1) {
 626         av_log(avctx, AV_LOG_ERROR,
 627                "automatic thread number detection not supported by codec, "
 628                "patch welcome\n");
 629         return -1;
 630     }
 631
 632     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 633         s->rtp_mode = 1;
 634
 635     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 636         s->h263_slice_structured = 1;
 637
 638     if (!avctx->time_base.den || !avctx->time_base.num) {
 639         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 640         return -1;
 641     }
 642
 643     i = (INT_MAX / 2 + 128) >> 8;
 644     if (avctx->mb_threshold >= i) {
 645         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 646                i - 1);
 647         return -1;
 648     }
 649
 650     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 651         av_log(avctx, AV_LOG_INFO,
 652                "notice: b_frame_strategy only affects the first pass\n");
 653         avctx->b_frame_strategy = 0;
 654     }
 655
 656     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 657     if (i > 1) {
 658         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 659         avctx->time_base.den /= i;
 660         avctx->time_base.num /= i;
 661         //return -1;
 662     }
 663
 664     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 665         // (a + x * 3 / 8) / x
 666         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 667         s->inter_quant_bias = 0;
 668     } else {
 669         s->intra_quant_bias = 0;
 670         // (a - x / 4) / x
 671         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 672     }
 673
 674     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 675         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 676         return AVERROR(EINVAL);
 677     }
 678
 679     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 680         s->intra_quant_bias = avctx->intra_quant_bias;
 681     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 682         s->inter_quant_bias = avctx->inter_quant_bias;
 683
 684     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 685
 686     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 687         s->avctx->time_base.den > (1 << 16) - 1) {
 688         av_log(avctx, AV_LOG_ERROR,
 689                "timebase %d/%d not supported by MPEG 4 standard, "
 690                "the maximum admitted value for the timebase denominator "
 691                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 692                (1 << 16) - 1);
 693         return -1;
 694     }
 695     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 696
 697     switch (avctx->codec->id) {
 698     case AV_CODEC_ID_MPEG1VIDEO:
 699         s->out_format = FMT_MPEG1;
 700         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 701         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 702         break;
 703     case AV_CODEC_ID_MPEG2VIDEO:
 704         s->out_format = FMT_MPEG1;
 705         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 706         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 707         s->rtp_mode   = 1;
 708         break;
 709     case AV_CODEC_ID_MJPEG:
 710     case AV_CODEC_ID_AMV:
 711         s->out_format = FMT_MJPEG;
 712         s->intra_only = 1; /* force intra only for jpeg */
 713         if (!CONFIG_MJPEG_ENCODER ||
 714             ff_mjpeg_encode_init(s) < 0)
 715             return -1;
 716         avctx->delay = 0;
 717         s->low_delay = 1;
 718         break;
 719     case AV_CODEC_ID_H261:
 720         if (!CONFIG_H261_ENCODER)
 721             return -1;
 722         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 723             av_log(avctx, AV_LOG_ERROR,
 724                    "The specified picture size of %dx%d is not valid for the "
 725                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 726                     s->width, s->height);
 727             return -1;
 728         }
 729         s->out_format = FMT_H261;
 730         avctx->delay  = 0;
 731         s->low_delay  = 1;
 732         break;
 733     case AV_CODEC_ID_H263:
 734         if (!CONFIG_H263_ENCODER)
 735             return -1;
 736         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 737                              s->width, s->height) == 8) {
 738             av_log(avctx, AV_LOG_ERROR,
 739                    "The specified picture size of %dx%d is not valid for "
 740                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 741                    "352x288, 704x576, and 1408x1152. "
 742                    "Try H.263+.\n", s->width, s->height);
 743             return -1;
 744         }
 745         s->out_format = FMT_H263;
 746         avctx->delay  = 0;
 747         s->low_delay  = 1;
 748         break;
 749     case AV_CODEC_ID_H263P:
 750         s->out_format = FMT_H263;
 751         s->h263_plus  = 1;
 752         /* Fx */
 753         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 754         s->modified_quant  = s->h263_aic;
 755         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 756         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 757
 758         /* /Fx */
 759         /* These are just to be sure */
 760         avctx->delay = 0;
 761         s->low_delay = 1;
 762         break;
 763     case AV_CODEC_ID_FLV1:
 764         s->out_format      = FMT_H263;
 765         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 766         s->unrestricted_mv = 1;
 767         s->rtp_mode  = 0; /* don't allow GOB */
 768         avctx->delay = 0;
 769         s->low_delay = 1;
 770         break;
 771     case AV_CODEC_ID_RV10:
 772         s->out_format = FMT_H263;
 773         avctx->delay  = 0;
 774         s->low_delay  = 1;
 775         break;
 776     case AV_CODEC_ID_RV20:
 777         s->out_format      = FMT_H263;
 778         avctx->delay       = 0;
 779         s->low_delay       = 1;
 780         s->modified_quant  = 1;
 781         s->h263_aic        = 1;
 782         s->h263_plus       = 1;
 783         s->loop_filter     = 1;
 784         s->unrestricted_mv = 0;
 785         break;
 786     case AV_CODEC_ID_MPEG4:
 787         s->out_format      = FMT_H263;
 788         s->h263_pred       = 1;
 789         s->unrestricted_mv = 1;
 790         s->low_delay       = s->max_b_frames ? 0 : 1;
 791         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 792         break;
 793     case AV_CODEC_ID_MSMPEG4V2:
 794         s->out_format      = FMT_H263;
 795         s->h263_pred       = 1;
 796         s->unrestricted_mv = 1;
 797         s->msmpeg4_version = 2;
 798         avctx->delay       = 0;
 799         s->low_delay       = 1;
 800         break;
 801     case AV_CODEC_ID_MSMPEG4V3:
 802         s->out_format        = FMT_H263;
 803         s->h263_pred         = 1;
 804         s->unrestricted_mv   = 1;
 805         s->msmpeg4_version   = 3;
 806         s->flipflop_rounding = 1;
 807         avctx->delay         = 0;
 808         s->low_delay         = 1;
 809         break;
 810     case AV_CODEC_ID_WMV1:
 811         s->out_format        = FMT_H263;
 812         s->h263_pred         = 1;
 813         s->unrestricted_mv   = 1;
 814         s->msmpeg4_version   = 4;
 815         s->flipflop_rounding = 1;
 816         avctx->delay         = 0;
 817         s->low_delay         = 1;
 818         break;
 819     case AV_CODEC_ID_WMV2:
 820         s->out_format        = FMT_H263;
 821         s->h263_pred         = 1;
 822         s->unrestricted_mv   = 1;
 823         s->msmpeg4_version   = 5;
 824         s->flipflop_rounding = 1;
 825         avctx->delay         = 0;
 826         s->low_delay         = 1;
 827         break;
 828     default:
 829         return -1;
 830     }
 831
 832     avctx->has_b_frames = !s->low_delay;
 833
 834     s->encoding = 1;
 835
 836     s->progressive_frame    =
 837     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 838                                                 CODEC_FLAG_INTERLACED_ME) ||
 839                                 s->alternate_scan);
 840
 841     /* init */
 842     ff_mpv_idct_init(s);
 843     if (ff_mpv_common_init(s) < 0)
 844         return -1;
 845
 846     ff_fdctdsp_init(&s->fdsp, avctx);
 847     ff_me_cmp_init(&s->mecc, avctx);
 848     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 849     ff_pixblockdsp_init(&s->pdsp, avctx);
 850     ff_qpeldsp_init(&s->qdsp);
 851
 852     s->avctx->coded_frame = s->current_picture.f;
 853
 854     if (s->msmpeg4_version) {
 855         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 856                           2 * 2 * (MAX_LEVEL + 1) *
 857                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 858     }
 859     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 860
 861     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 862     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 863     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 864     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 865     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 866     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 867     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 868                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 869     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 870                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 871
 872     if (s->avctx->noise_reduction) {
 873         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 874                           2 * 64 * sizeof(uint16_t), fail);
 875     }
 876
 877     ff_dct_encode_init(s);
 878
 879     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 880         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 881
 882     s->quant_precision = 5;
 883
 884     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 885     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
 886
 887     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 888         ff_h261_encode_init(s);
 889     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 890         ff_h263_encode_init(s);
 891     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 892         ff_msmpeg4_encode_init(s);
 893     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 894         && s->out_format == FMT_MPEG1)
 895         ff_mpeg1_encode_init(s);
 896
 897     /* init q matrix */
 898     for (i = 0; i < 64; i++) {
 899         int j = s->idsp.idct_permutation[i];
 900         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 901             s->mpeg_quant) {
 902             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 903             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 904         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 905             s->intra_matrix[j] =
 906             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 907         } else {
 908             /* mpeg1/2 */
 909             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 910             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 911         }
 912         if (s->avctx->intra_matrix)
 913             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 914         if (s->avctx->inter_matrix)
 915             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 916     }
 917
 918     /* precompute matrix */
 919     /* for mjpeg, we do include qscale in the matrix */
 920     if (s->out_format != FMT_MJPEG) {
 921         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 922                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 923                           31, 1);
 924         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 925                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 926                           31, 0);
 927     }
 928
 929     if (ff_rate_control_init(s) < 0)
 930         return -1;
 931
 932 #if FF_API_ERROR_RATE
 933     FF_DISABLE_DEPRECATION_WARNINGS
 934     if (avctx->error_rate)
 935         s->error_rate = avctx->error_rate;
 936     FF_ENABLE_DEPRECATION_WARNINGS;
 937 #endif
 938
 939 #if FF_API_NORMALIZE_AQP
 940     FF_DISABLE_DEPRECATION_WARNINGS
 941     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 942         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 943     FF_ENABLE_DEPRECATION_WARNINGS;
 944 #endif
 945
 946 #if FF_API_MV0
 947     FF_DISABLE_DEPRECATION_WARNINGS
 948     if (avctx->flags & CODEC_FLAG_MV0)
 949         s->mpv_flags |= FF_MPV_FLAG_MV0;
 950     FF_ENABLE_DEPRECATION_WARNINGS
 951 #endif
 952
 953     if (avctx->b_frame_strategy == 2) {
 954         for (i = 0; i < s->max_b_frames + 2; i++) {
 955             s->tmp_frames[i] = av_frame_alloc();
 956             if (!s->tmp_frames[i])
 957                 return AVERROR(ENOMEM);
 958
 959             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 960             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 961             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 962
 963             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 964             if (ret < 0)
 965                 return ret;
 966         }
 967     }
 968
 969     return 0;
 970 fail:
 971     ff_mpv_encode_end(avctx);
 972     return AVERROR_UNKNOWN;
 973 }
 974
 975 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
 976 {
 977     MpegEncContext *s = avctx->priv_data;
 978     int i;
 979
 980     ff_rate_control_uninit(s);
 981
 982     ff_mpv_common_end(s);
 983     if (CONFIG_MJPEG_ENCODER &&
 984         s->out_format == FMT_MJPEG)
 985         ff_mjpeg_encode_close(s);
 986
 987     av_freep(&avctx->extradata);
 988
 989     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 990         av_frame_free(&s->tmp_frames[i]);
 991
 992     ff_free_picture_tables(&s->new_picture);
 993     ff_mpeg_unref_picture(s, &s->new_picture);
 994
 995     av_freep(&s->avctx->stats_out);
 996     av_freep(&s->ac_stats);
 997
 998     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
 999     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1000     s->q_chroma_intra_matrix=   NULL;
1001     s->q_chroma_intra_matrix16= NULL;
1002     av_freep(&s->q_intra_matrix);
1003     av_freep(&s->q_inter_matrix);
1004     av_freep(&s->q_intra_matrix16);
1005     av_freep(&s->q_inter_matrix16);
1006     av_freep(&s->input_picture);
1007     av_freep(&s->reordered_input_picture);
1008     av_freep(&s->dct_offset);
1009
1010     return 0;
1011 }
1012
1013 static int get_sae(uint8_t *src, int ref, int stride)
1014 {
1015     int x,y;
1016     int acc = 0;
1017
1018     for (y = 0; y < 16; y++) {
1019         for (x = 0; x < 16; x++) {
1020             acc += FFABS(src[x + y * stride] - ref);
1021         }
1022     }
1023
1024     return acc;
1025 }
1026
1027 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1028                            uint8_t *ref, int stride)
1029 {
1030     int x, y, w, h;
1031     int acc = 0;
1032
1033     w = s->width  & ~15;
1034     h = s->height & ~15;
1035
1036     for (y = 0; y < h; y += 16) {
1037         for (x = 0; x < w; x += 16) {
1038             int offset = x + y * stride;
1039             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1040                                       stride, 16);
1041             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1042             int sae  = get_sae(src + offset, mean, stride);
1043
1044             acc += sae + 500 < sad;
1045         }
1046     }
1047     return acc;
1048 }
1049
1050
1051 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1052 {
1053     Picture *pic = NULL;
1054     int64_t pts;
1055     int i, display_picture_number = 0, ret;
1056     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1057                                                  (s->low_delay ? 0 : 1);
1058     int direct = 1;
1059
1060     if (pic_arg) {
1061         pts = pic_arg->pts;
1062         display_picture_number = s->input_picture_number++;
1063
1064         if (pts != AV_NOPTS_VALUE) {
1065             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1066                 int64_t last = s->user_specified_pts;
1067
1068                 if (pts <= last) {
1069                     av_log(s->avctx, AV_LOG_ERROR,
1070                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1071                            pts, last);
1072                     return AVERROR(EINVAL);
1073                 }
1074
1075                 if (!s->low_delay && display_picture_number == 1)
1076                     s->dts_delta = pts - last;
1077             }
1078             s->user_specified_pts = pts;
1079         } else {
1080             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1081                 s->user_specified_pts =
1082                 pts = s->user_specified_pts + 1;
1083                 av_log(s->avctx, AV_LOG_INFO,
1084                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1085                        pts);
1086             } else {
1087                 pts = display_picture_number;
1088             }
1089         }
1090     }
1091
1092     if (pic_arg) {
1093         if (!pic_arg->buf[0])
1094             direct = 0;
1095         if (pic_arg->linesize[0] != s->linesize)
1096             direct = 0;
1097         if (pic_arg->linesize[1] != s->uvlinesize)
1098             direct = 0;
1099         if (pic_arg->linesize[2] != s->uvlinesize)
1100             direct = 0;
1101         if ((s->width & 15) || (s->height & 15))
1102             direct = 0;
1103         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1104             direct = 0;
1105         if (s->linesize & (STRIDE_ALIGN-1))
1106             direct = 0;
1107
1108         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1109                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1110
1111         if (direct) {
1112             i = ff_find_unused_picture(s, 1);
1113             if (i < 0)
1114                 return i;
1115
1116             pic = &s->picture[i];
1117             pic->reference = 3;
1118
1119             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1120                 return ret;
1121             if (ff_alloc_picture(s, pic, 1) < 0) {
1122                 return -1;
1123             }
1124         } else {
1125             i = ff_find_unused_picture(s, 0);
1126             if (i < 0)
1127                 return i;
1128
1129             pic = &s->picture[i];
1130             pic->reference = 3;
1131
1132             if (ff_alloc_picture(s, pic, 0) < 0) {
1133                 return -1;
1134             }
1135
1136             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1137                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1138                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1139                 // empty
1140             } else {
1141                 int h_chroma_shift, v_chroma_shift;
1142                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1143                                                  &h_chroma_shift,
1144                                                  &v_chroma_shift);
1145
1146                 for (i = 0; i < 3; i++) {
1147                     int src_stride = pic_arg->linesize[i];
1148                     int dst_stride = i ? s->uvlinesize : s->linesize;
1149                     int h_shift = i ? h_chroma_shift : 0;
1150                     int v_shift = i ? v_chroma_shift : 0;
1151                     int w = s->width  >> h_shift;
1152                     int h = s->height >> v_shift;
1153                     uint8_t *src = pic_arg->data[i];
1154                     uint8_t *dst = pic->f->data[i];
1155                     int vpad = 16;
1156
1157                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1158                         && !s->progressive_sequence
1159                         && FFALIGN(s->height, 32) - s->height > 16)
1160                         vpad = 32;
1161
1162                     if (!s->avctx->rc_buffer_size)
1163                         dst += INPLACE_OFFSET;
1164
1165                     if (src_stride == dst_stride)
1166                         memcpy(dst, src, src_stride * h);
1167                     else {
1168                         int h2 = h;
1169                         uint8_t *dst2 = dst;
1170                         while (h2--) {
1171                             memcpy(dst2, src, w);
1172                             dst2 += dst_stride;
1173                             src += src_stride;
1174                         }
1175                     }
1176                     if ((s->width & 15) || (s->height & (vpad-1))) {
1177                         s->mpvencdsp.draw_edges(dst, dst_stride,
1178                                                 w, h,
1179                                                 16>>h_shift,
1180                                                 vpad>>v_shift,
1181                                                 EDGE_BOTTOM);
1182                     }
1183                 }
1184             }
1185         }
1186         ret = av_frame_copy_props(pic->f, pic_arg);
1187         if (ret < 0)
1188             return ret;
1189
1190         pic->f->display_picture_number = display_picture_number;
1191         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1192     }
1193
1194     /* shift buffer entries */
1195     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1196         s->input_picture[i - 1] = s->input_picture[i];
1197
1198     s->input_picture[encoding_delay] = (Picture*) pic;
1199
1200     return 0;
1201 }
1202
1203 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1204 {
1205     int x, y, plane;
1206     int score = 0;
1207     int64_t score64 = 0;
1208
1209     for (plane = 0; plane < 3; plane++) {
1210         const int stride = p->f->linesize[plane];
1211         const int bw = plane ? 1 : 2;
1212         for (y = 0; y < s->mb_height * bw; y++) {
1213             for (x = 0; x < s->mb_width * bw; x++) {
1214                 int off = p->shared ? 0 : 16;
1215                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1216                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1217                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1218
1219                 switch (FFABS(s->avctx->frame_skip_exp)) {
1220                 case 0: score    =  FFMAX(score, v);          break;
1221                 case 1: score   += FFABS(v);                  break;
1222                 case 2: score64 += v * (int64_t)v;                       break;
1223                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1224                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1225                 }
1226             }
1227         }
1228     }
1229     emms_c();
1230
1231     if (score)
1232         score64 = score;
1233     if (s->avctx->frame_skip_exp < 0)
1234         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1235                       -1.0/s->avctx->frame_skip_exp);
1236
1237     if (score64 < s->avctx->frame_skip_threshold)
1238         return 1;
1239     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1240         return 1;
1241     return 0;
1242 }
1243
1244 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1245 {
1246     AVPacket pkt = { 0 };
1247     int ret, got_output;
1248
1249     av_init_packet(&pkt);
1250     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1251     if (ret < 0)
1252         return ret;
1253
1254     ret = pkt.size;
1255     av_free_packet(&pkt);
1256     return ret;
1257 }
1258
1259 static int estimate_best_b_count(MpegEncContext *s)
1260 {
1261     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1262     AVCodecContext *c = avcodec_alloc_context3(NULL);
1263     const int scale = s->avctx->brd_scale;
1264     int i, j, out_size, p_lambda, b_lambda, lambda2;
1265     int64_t best_rd  = INT64_MAX;
1266     int best_b_count = -1;
1267
1268     av_assert0(scale >= 0 && scale <= 3);
1269
1270     //emms_c();
1271     //s->next_picture_ptr->quality;
1272     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1273     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1274     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1275     if (!b_lambda) // FIXME we should do this somewhere else
1276         b_lambda = p_lambda;
1277     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1278                FF_LAMBDA_SHIFT;
1279
1280     c->width        = s->width  >> scale;
1281     c->height       = s->height >> scale;
1282     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1283     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1284     c->mb_decision  = s->avctx->mb_decision;
1285     c->me_cmp       = s->avctx->me_cmp;
1286     c->mb_cmp       = s->avctx->mb_cmp;
1287     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1288     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1289     c->time_base    = s->avctx->time_base;
1290     c->max_b_frames = s->max_b_frames;
1291
1292     if (avcodec_open2(c, codec, NULL) < 0)
1293         return -1;
1294
1295     for (i = 0; i < s->max_b_frames + 2; i++) {
1296         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1297                                                 s->next_picture_ptr;
1298         uint8_t *data[4];
1299
1300         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1301             pre_input = *pre_input_ptr;
1302             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1303
1304             if (!pre_input.shared && i) {
1305                 data[0] += INPLACE_OFFSET;
1306                 data[1] += INPLACE_OFFSET;
1307                 data[2] += INPLACE_OFFSET;
1308             }
1309
1310             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1311                                        s->tmp_frames[i]->linesize[0],
1312                                        data[0],
1313                                        pre_input.f->linesize[0],
1314                                        c->width, c->height);
1315             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1316                                        s->tmp_frames[i]->linesize[1],
1317                                        data[1],
1318                                        pre_input.f->linesize[1],
1319                                        c->width >> 1, c->height >> 1);
1320             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1321                                        s->tmp_frames[i]->linesize[2],
1322                                        data[2],
1323                                        pre_input.f->linesize[2],
1324                                        c->width >> 1, c->height >> 1);
1325         }
1326     }
1327
1328     for (j = 0; j < s->max_b_frames + 1; j++) {
1329         int64_t rd = 0;
1330
1331         if (!s->input_picture[j])
1332             break;
1333
1334         c->error[0] = c->error[1] = c->error[2] = 0;
1335
1336         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1337         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1338
1339         out_size = encode_frame(c, s->tmp_frames[0]);
1340
1341         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1342
1343         for (i = 0; i < s->max_b_frames + 1; i++) {
1344             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1345
1346             s->tmp_frames[i + 1]->pict_type = is_p ?
1347                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1348             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1349
1350             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1351
1352             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1353         }
1354
1355         /* get the delayed frames */
1356         while (out_size) {
1357             out_size = encode_frame(c, NULL);
1358             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1359         }
1360
1361         rd += c->error[0] + c->error[1] + c->error[2];
1362
1363         if (rd < best_rd) {
1364             best_rd = rd;
1365             best_b_count = j;
1366         }
1367     }
1368
1369     avcodec_close(c);
1370     av_freep(&c);
1371
1372     return best_b_count;
1373 }
1374
1375 static int select_input_picture(MpegEncContext *s)
1376 {
1377     int i, ret;
1378
1379     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1380         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1381     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1382
1383     /* set next picture type & ordering */
1384     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1385         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1386             if (s->picture_in_gop_number < s->gop_size &&
1387                 s->next_picture_ptr &&
1388                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1389                 // FIXME check that te gop check above is +-1 correct
1390                 av_frame_unref(s->input_picture[0]->f);
1391
1392                 ff_vbv_update(s, 0);
1393
1394                 goto no_output_pic;
1395             }
1396         }
1397
1398         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1399             !s->next_picture_ptr || s->intra_only) {
1400             s->reordered_input_picture[0] = s->input_picture[0];
1401             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1402             s->reordered_input_picture[0]->f->coded_picture_number =
1403                 s->coded_picture_number++;
1404         } else {
1405             int b_frames;
1406
1407             if (s->flags & CODEC_FLAG_PASS2) {
1408                 for (i = 0; i < s->max_b_frames + 1; i++) {
1409                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1410
1411                     if (pict_num >= s->rc_context.num_entries)
1412                         break;
1413                     if (!s->input_picture[i]) {
1414                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1415                         break;
1416                     }
1417
1418                     s->input_picture[i]->f->pict_type =
1419                         s->rc_context.entry[pict_num].new_pict_type;
1420                 }
1421             }
1422
1423             if (s->avctx->b_frame_strategy == 0) {
1424                 b_frames = s->max_b_frames;
1425                 while (b_frames && !s->input_picture[b_frames])
1426                     b_frames--;
1427             } else if (s->avctx->b_frame_strategy == 1) {
1428                 for (i = 1; i < s->max_b_frames + 1; i++) {
1429                     if (s->input_picture[i] &&
1430                         s->input_picture[i]->b_frame_score == 0) {
1431                         s->input_picture[i]->b_frame_score =
1432                             get_intra_count(s,
1433                                             s->input_picture[i    ]->f->data[0],
1434                                             s->input_picture[i - 1]->f->data[0],
1435                                             s->linesize) + 1;
1436                     }
1437                 }
1438                 for (i = 0; i < s->max_b_frames + 1; i++) {
1439                     if (!s->input_picture[i] ||
1440                         s->input_picture[i]->b_frame_score - 1 >
1441                             s->mb_num / s->avctx->b_sensitivity)
1442                         break;
1443                 }
1444
1445                 b_frames = FFMAX(0, i - 1);
1446
1447                 /* reset scores */
1448                 for (i = 0; i < b_frames + 1; i++) {
1449                     s->input_picture[i]->b_frame_score = 0;
1450                 }
1451             } else if (s->avctx->b_frame_strategy == 2) {
1452                 b_frames = estimate_best_b_count(s);
1453             } else {
1454                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1455                 b_frames = 0;
1456             }
1457
1458             emms_c();
1459
1460             for (i = b_frames - 1; i >= 0; i--) {
1461                 int type = s->input_picture[i]->f->pict_type;
1462                 if (type && type != AV_PICTURE_TYPE_B)
1463                     b_frames = i;
1464             }
1465             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1466                 b_frames == s->max_b_frames) {
1467                 av_log(s->avctx, AV_LOG_ERROR,
1468                        "warning, too many b frames in a row\n");
1469             }
1470
1471             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1472                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1473                     s->gop_size > s->picture_in_gop_number) {
1474                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1475                 } else {
1476                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1477                         b_frames = 0;
1478                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1479                 }
1480             }
1481
1482             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1483                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1484                 b_frames--;
1485
1486             s->reordered_input_picture[0] = s->input_picture[b_frames];
1487             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1488                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1489             s->reordered_input_picture[0]->f->coded_picture_number =
1490                 s->coded_picture_number++;
1491             for (i = 0; i < b_frames; i++) {
1492                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1493                 s->reordered_input_picture[i + 1]->f->pict_type =
1494                     AV_PICTURE_TYPE_B;
1495                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1496                     s->coded_picture_number++;
1497             }
1498         }
1499     }
1500 no_output_pic:
1501     if (s->reordered_input_picture[0]) {
1502         s->reordered_input_picture[0]->reference =
1503            s->reordered_input_picture[0]->f->pict_type !=
1504                AV_PICTURE_TYPE_B ? 3 : 0;
1505
1506         ff_mpeg_unref_picture(s, &s->new_picture);
1507         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1508             return ret;
1509
1510         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1511             // input is a shared pix, so we can't modifiy it -> alloc a new
1512             // one & ensure that the shared one is reuseable
1513
1514             Picture *pic;
1515             int i = ff_find_unused_picture(s, 0);
1516             if (i < 0)
1517                 return i;
1518             pic = &s->picture[i];
1519
1520             pic->reference = s->reordered_input_picture[0]->reference;
1521             if (ff_alloc_picture(s, pic, 0) < 0) {
1522                 return -1;
1523             }
1524
1525             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1526             if (ret < 0)
1527                 return ret;
1528
1529             /* mark us unused / free shared pic */
1530             av_frame_unref(s->reordered_input_picture[0]->f);
1531             s->reordered_input_picture[0]->shared = 0;
1532
1533             s->current_picture_ptr = pic;
1534         } else {
1535             // input is not a shared pix -> reuse buffer for current_pix
1536             s->current_picture_ptr = s->reordered_input_picture[0];
1537             for (i = 0; i < 4; i++) {
1538                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1539             }
1540         }
1541         ff_mpeg_unref_picture(s, &s->current_picture);
1542         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1543                                        s->current_picture_ptr)) < 0)
1544             return ret;
1545
1546         s->picture_number = s->new_picture.f->display_picture_number;
1547     } else {
1548         ff_mpeg_unref_picture(s, &s->new_picture);
1549     }
1550     return 0;
1551 }
1552
1553 static void frame_end(MpegEncContext *s)
1554 {
1555     if (s->unrestricted_mv &&
1556         s->current_picture.reference &&
1557         !s->intra_only) {
1558         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1559         int hshift = desc->log2_chroma_w;
1560         int vshift = desc->log2_chroma_h;
1561         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1562                                 s->current_picture.f->linesize[0],
1563                                 s->h_edge_pos, s->v_edge_pos,
1564                                 EDGE_WIDTH, EDGE_WIDTH,
1565                                 EDGE_TOP | EDGE_BOTTOM);
1566         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1567                                 s->current_picture.f->linesize[1],
1568                                 s->h_edge_pos >> hshift,
1569                                 s->v_edge_pos >> vshift,
1570                                 EDGE_WIDTH >> hshift,
1571                                 EDGE_WIDTH >> vshift,
1572                                 EDGE_TOP | EDGE_BOTTOM);
1573         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1574                                 s->current_picture.f->linesize[2],
1575                                 s->h_edge_pos >> hshift,
1576                                 s->v_edge_pos >> vshift,
1577                                 EDGE_WIDTH >> hshift,
1578                                 EDGE_WIDTH >> vshift,
1579                                 EDGE_TOP | EDGE_BOTTOM);
1580     }
1581
1582     emms_c();
1583
1584     s->last_pict_type                 = s->pict_type;
1585     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1586     if (s->pict_type!= AV_PICTURE_TYPE_B)
1587         s->last_non_b_pict_type = s->pict_type;
1588
1589     s->avctx->coded_frame = s->current_picture_ptr->f;
1590
1591 }
1592
1593 static void update_noise_reduction(MpegEncContext *s)
1594 {
1595     int intra, i;
1596
1597     for (intra = 0; intra < 2; intra++) {
1598         if (s->dct_count[intra] > (1 << 16)) {
1599             for (i = 0; i < 64; i++) {
1600                 s->dct_error_sum[intra][i] >>= 1;
1601             }
1602             s->dct_count[intra] >>= 1;
1603         }
1604
1605         for (i = 0; i < 64; i++) {
1606             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1607                                        s->dct_count[intra] +
1608                                        s->dct_error_sum[intra][i] / 2) /
1609                                       (s->dct_error_sum[intra][i] + 1);
1610         }
1611     }
1612 }
1613
1614 static int frame_start(MpegEncContext *s)
1615 {
1616     int ret;
1617
1618     /* mark & release old frames */
1619     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1620         s->last_picture_ptr != s->next_picture_ptr &&
1621         s->last_picture_ptr->f->buf[0]) {
1622         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1623     }
1624
1625     s->current_picture_ptr->f->pict_type = s->pict_type;
1626     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1627
1628     ff_mpeg_unref_picture(s, &s->current_picture);
1629     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1630                                    s->current_picture_ptr)) < 0)
1631         return ret;
1632
1633     if (s->pict_type != AV_PICTURE_TYPE_B) {
1634         s->last_picture_ptr = s->next_picture_ptr;
1635         if (!s->droppable)
1636             s->next_picture_ptr = s->current_picture_ptr;
1637     }
1638
1639     if (s->last_picture_ptr) {
1640         ff_mpeg_unref_picture(s, &s->last_picture);
1641         if (s->last_picture_ptr->f->buf[0] &&
1642             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1643                                        s->last_picture_ptr)) < 0)
1644             return ret;
1645     }
1646     if (s->next_picture_ptr) {
1647         ff_mpeg_unref_picture(s, &s->next_picture);
1648         if (s->next_picture_ptr->f->buf[0] &&
1649             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1650                                        s->next_picture_ptr)) < 0)
1651             return ret;
1652     }
1653
1654     if (s->picture_structure!= PICT_FRAME) {
1655         int i;
1656         for (i = 0; i < 4; i++) {
1657             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1658                 s->current_picture.f->data[i] +=
1659                     s->current_picture.f->linesize[i];
1660             }
1661             s->current_picture.f->linesize[i] *= 2;
1662             s->last_picture.f->linesize[i]    *= 2;
1663             s->next_picture.f->linesize[i]    *= 2;
1664         }
1665     }
1666
1667     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1668         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1669         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1670     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1671         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1672         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1673     } else {
1674         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1675         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1676     }
1677
1678     if (s->dct_error_sum) {
1679         av_assert2(s->avctx->noise_reduction && s->encoding);
1680         update_noise_reduction(s);
1681     }
1682
1683     return 0;
1684 }
1685
1686 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1687                           const AVFrame *pic_arg, int *got_packet)
1688 {
1689     MpegEncContext *s = avctx->priv_data;
1690     int i, stuffing_count, ret;
1691     int context_count = s->slice_context_count;
1692
1693     s->picture_in_gop_number++;
1694
1695     if (load_input_picture(s, pic_arg) < 0)
1696         return -1;
1697
1698     if (select_input_picture(s) < 0) {
1699         return -1;
1700     }
1701
1702     /* output? */
1703     if (s->new_picture.f->data[0]) {
1704         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1705         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - FF_INPUT_BUFFER_PADDING_SIZE
1706                                               :
1707                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1708         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
1709             return ret;
1710         if (s->mb_info) {
1711             s->mb_info_ptr = av_packet_new_side_data(pkt,
1712                                  AV_PKT_DATA_H263_MB_INFO,
1713                                  s->mb_width*s->mb_height*12);
1714             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1715         }
1716
1717         for (i = 0; i < context_count; i++) {
1718             int start_y = s->thread_context[i]->start_mb_y;
1719             int   end_y = s->thread_context[i]->  end_mb_y;
1720             int h       = s->mb_height;
1721             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1722             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1723
1724             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1725         }
1726
1727         s->pict_type = s->new_picture.f->pict_type;
1728         //emms_c();
1729         ret = frame_start(s);
1730         if (ret < 0)
1731             return ret;
1732 vbv_retry:
1733         ret = encode_picture(s, s->picture_number);
1734         if (growing_buffer) {
1735             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1736             pkt->data = s->pb.buf;
1737             pkt->size = avctx->internal->byte_buffer_size;
1738         }
1739         if (ret < 0)
1740             return -1;
1741
1742         avctx->header_bits = s->header_bits;
1743         avctx->mv_bits     = s->mv_bits;
1744         avctx->misc_bits   = s->misc_bits;
1745         avctx->i_tex_bits  = s->i_tex_bits;
1746         avctx->p_tex_bits  = s->p_tex_bits;
1747         avctx->i_count     = s->i_count;
1748         // FIXME f/b_count in avctx
1749         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1750         avctx->skip_count  = s->skip_count;
1751
1752         frame_end(s);
1753
1754         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1755             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1756
1757         if (avctx->rc_buffer_size) {
1758             RateControlContext *rcc = &s->rc_context;
1759             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1760
1761             if (put_bits_count(&s->pb) > max_size &&
1762                 s->lambda < s->avctx->lmax) {
1763                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1764                                        (s->qscale + 1) / s->qscale);
1765                 if (s->adaptive_quant) {
1766                     int i;
1767                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1768                         s->lambda_table[i] =
1769                             FFMAX(s->lambda_table[i] + 1,
1770                                   s->lambda_table[i] * (s->qscale + 1) /
1771                                   s->qscale);
1772                 }
1773                 s->mb_skipped = 0;        // done in frame_start()
1774                 // done in encode_picture() so we must undo it
1775                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1776                     if (s->flipflop_rounding          ||
1777                         s->codec_id == AV_CODEC_ID_H263P ||
1778                         s->codec_id == AV_CODEC_ID_MPEG4)
1779                         s->no_rounding ^= 1;
1780                 }
1781                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1782                     s->time_base       = s->last_time_base;
1783                     s->last_non_b_time = s->time - s->pp_time;
1784                 }
1785                 for (i = 0; i < context_count; i++) {
1786                     PutBitContext *pb = &s->thread_context[i]->pb;
1787                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1788                 }
1789                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1790                 goto vbv_retry;
1791             }
1792
1793             av_assert0(s->avctx->rc_max_rate);
1794         }
1795
1796         if (s->flags & CODEC_FLAG_PASS1)
1797             ff_write_pass1_stats(s);
1798
1799         for (i = 0; i < 4; i++) {
1800             s->current_picture_ptr->f->error[i] =
1801             s->current_picture.f->error[i] =
1802                 s->current_picture.error[i];
1803             avctx->error[i] += s->current_picture_ptr->f->error[i];
1804         }
1805
1806         if (s->flags & CODEC_FLAG_PASS1)
1807             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1808                    avctx->i_tex_bits + avctx->p_tex_bits ==
1809                        put_bits_count(&s->pb));
1810         flush_put_bits(&s->pb);
1811         s->frame_bits  = put_bits_count(&s->pb);
1812
1813         stuffing_count = ff_vbv_update(s, s->frame_bits);
1814         s->stuffing_bits = 8*stuffing_count;
1815         if (stuffing_count) {
1816             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1817                     stuffing_count + 50) {
1818                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1819                 return -1;
1820             }
1821
1822             switch (s->codec_id) {
1823             case AV_CODEC_ID_MPEG1VIDEO:
1824             case AV_CODEC_ID_MPEG2VIDEO:
1825                 while (stuffing_count--) {
1826                     put_bits(&s->pb, 8, 0);
1827                 }
1828             break;
1829             case AV_CODEC_ID_MPEG4:
1830                 put_bits(&s->pb, 16, 0);
1831                 put_bits(&s->pb, 16, 0x1C3);
1832                 stuffing_count -= 4;
1833                 while (stuffing_count--) {
1834                     put_bits(&s->pb, 8, 0xFF);
1835                 }
1836             break;
1837             default:
1838                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1839             }
1840             flush_put_bits(&s->pb);
1841             s->frame_bits  = put_bits_count(&s->pb);
1842         }
1843
1844         /* update mpeg1/2 vbv_delay for CBR */
1845         if (s->avctx->rc_max_rate                          &&
1846             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1847             s->out_format == FMT_MPEG1                     &&
1848             90000LL * (avctx->rc_buffer_size - 1) <=
1849                 s->avctx->rc_max_rate * 0xFFFFLL) {
1850             int vbv_delay, min_delay;
1851             double inbits  = s->avctx->rc_max_rate *
1852                              av_q2d(s->avctx->time_base);
1853             int    minbits = s->frame_bits - 8 *
1854                              (s->vbv_delay_ptr - s->pb.buf - 1);
1855             double bits    = s->rc_context.buffer_index + minbits - inbits;
1856
1857             if (bits < 0)
1858                 av_log(s->avctx, AV_LOG_ERROR,
1859                        "Internal error, negative bits\n");
1860
1861             assert(s->repeat_first_field == 0);
1862
1863             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1864             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1865                         s->avctx->rc_max_rate;
1866
1867             vbv_delay = FFMAX(vbv_delay, min_delay);
1868
1869             av_assert0(vbv_delay < 0xFFFF);
1870
1871             s->vbv_delay_ptr[0] &= 0xF8;
1872             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1873             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1874             s->vbv_delay_ptr[2] &= 0x07;
1875             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1876             avctx->vbv_delay     = vbv_delay * 300;
1877         }
1878         s->total_bits     += s->frame_bits;
1879         avctx->frame_bits  = s->frame_bits;
1880
1881         pkt->pts = s->current_picture.f->pts;
1882         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1883             if (!s->current_picture.f->coded_picture_number)
1884                 pkt->dts = pkt->pts - s->dts_delta;
1885             else
1886                 pkt->dts = s->reordered_pts;
1887             s->reordered_pts = pkt->pts;
1888         } else
1889             pkt->dts = pkt->pts;
1890         if (s->current_picture.f->key_frame)
1891             pkt->flags |= AV_PKT_FLAG_KEY;
1892         if (s->mb_info)
1893             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1894     } else {
1895         s->frame_bits = 0;
1896     }
1897
1898     /* release non-reference frames */
1899     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1900         if (!s->picture[i].reference)
1901             ff_mpeg_unref_picture(s, &s->picture[i]);
1902     }
1903
1904     av_assert1((s->frame_bits & 7) == 0);
1905
1906     pkt->size = s->frame_bits / 8;
1907     *got_packet = !!pkt->size;
1908     return 0;
1909 }
1910
1911 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1912                                                 int n, int threshold)
1913 {
1914     static const char tab[64] = {
1915         3, 2, 2, 1, 1, 1, 1, 1,
1916         1, 1, 1, 1, 1, 1, 1, 1,
1917         1, 1, 1, 1, 1, 1, 1, 1,
1918         0, 0, 0, 0, 0, 0, 0, 0,
1919         0, 0, 0, 0, 0, 0, 0, 0,
1920         0, 0, 0, 0, 0, 0, 0, 0,
1921         0, 0, 0, 0, 0, 0, 0, 0,
1922         0, 0, 0, 0, 0, 0, 0, 0
1923     };
1924     int score = 0;
1925     int run = 0;
1926     int i;
1927     int16_t *block = s->block[n];
1928     const int last_index = s->block_last_index[n];
1929     int skip_dc;
1930
1931     if (threshold < 0) {
1932         skip_dc = 0;
1933         threshold = -threshold;
1934     } else
1935         skip_dc = 1;
1936
1937     /* Are all we could set to zero already zero? */
1938     if (last_index <= skip_dc - 1)
1939         return;
1940
1941     for (i = 0; i <= last_index; i++) {
1942         const int j = s->intra_scantable.permutated[i];
1943         const int level = FFABS(block[j]);
1944         if (level == 1) {
1945             if (skip_dc && i == 0)
1946                 continue;
1947             score += tab[run];
1948             run = 0;
1949         } else if (level > 1) {
1950             return;
1951         } else {
1952             run++;
1953         }
1954     }
1955     if (score >= threshold)
1956         return;
1957     for (i = skip_dc; i <= last_index; i++) {
1958         const int j = s->intra_scantable.permutated[i];
1959         block[j] = 0;
1960     }
1961     if (block[0])
1962         s->block_last_index[n] = 0;
1963     else
1964         s->block_last_index[n] = -1;
1965 }
1966
1967 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1968                                int last_index)
1969 {
1970     int i;
1971     const int maxlevel = s->max_qcoeff;
1972     const int minlevel = s->min_qcoeff;
1973     int overflow = 0;
1974
1975     if (s->mb_intra) {
1976         i = 1; // skip clipping of intra dc
1977     } else
1978         i = 0;
1979
1980     for (; i <= last_index; i++) {
1981         const int j = s->intra_scantable.permutated[i];
1982         int level = block[j];
1983
1984         if (level > maxlevel) {
1985             level = maxlevel;
1986             overflow++;
1987         } else if (level < minlevel) {
1988             level = minlevel;
1989             overflow++;
1990         }
1991
1992         block[j] = level;
1993     }
1994
1995     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1996         av_log(s->avctx, AV_LOG_INFO,
1997                "warning, clipping %d dct coefficients to %d..%d\n",
1998                overflow, minlevel, maxlevel);
1999 }
2000
2001 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2002 {
2003     int x, y;
2004     // FIXME optimize
2005     for (y = 0; y < 8; y++) {
2006         for (x = 0; x < 8; x++) {
2007             int x2, y2;
2008             int sum = 0;
2009             int sqr = 0;
2010             int count = 0;
2011
2012             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2013                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2014                     int v = ptr[x2 + y2 * stride];
2015                     sum += v;
2016                     sqr += v * v;
2017                     count++;
2018                 }
2019             }
2020             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2021         }
2022     }
2023 }
2024
2025 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2026                                                 int motion_x, int motion_y,
2027                                                 int mb_block_height,
2028                                                 int mb_block_width,
2029                                                 int mb_block_count)
2030 {
2031     int16_t weight[12][64];
2032     int16_t orig[12][64];
2033     const int mb_x = s->mb_x;
2034     const int mb_y = s->mb_y;
2035     int i;
2036     int skip_dct[12];
2037     int dct_offset = s->linesize * 8; // default for progressive frames
2038     int uv_dct_offset = s->uvlinesize * 8;
2039     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2040     ptrdiff_t wrap_y, wrap_c;
2041
2042     for (i = 0; i < mb_block_count; i++)
2043         skip_dct[i] = s->skipdct;
2044
2045     if (s->adaptive_quant) {
2046         const int last_qp = s->qscale;
2047         const int mb_xy = mb_x + mb_y * s->mb_stride;
2048
2049         s->lambda = s->lambda_table[mb_xy];
2050         update_qscale(s);
2051
2052         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2053             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2054             s->dquant = s->qscale - last_qp;
2055
2056             if (s->out_format == FMT_H263) {
2057                 s->dquant = av_clip(s->dquant, -2, 2);
2058
2059                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2060                     if (!s->mb_intra) {
2061                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2062                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2063                                 s->dquant = 0;
2064                         }
2065                         if (s->mv_type == MV_TYPE_8X8)
2066                             s->dquant = 0;
2067                     }
2068                 }
2069             }
2070         }
2071         ff_set_qscale(s, last_qp + s->dquant);
2072     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2073         ff_set_qscale(s, s->qscale + s->dquant);
2074
2075     wrap_y = s->linesize;
2076     wrap_c = s->uvlinesize;
2077     ptr_y  = s->new_picture.f->data[0] +
2078              (mb_y * 16 * wrap_y)              + mb_x * 16;
2079     ptr_cb = s->new_picture.f->data[1] +
2080              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2081     ptr_cr = s->new_picture.f->data[2] +
2082              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2083
2084     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2085         uint8_t *ebuf = s->edge_emu_buffer + 36 * wrap_y;
2086         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2087         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2088         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2089                                  wrap_y, wrap_y,
2090                                  16, 16, mb_x * 16, mb_y * 16,
2091                                  s->width, s->height);
2092         ptr_y = ebuf;
2093         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2094                                  wrap_c, wrap_c,
2095                                  mb_block_width, mb_block_height,
2096                                  mb_x * mb_block_width, mb_y * mb_block_height,
2097                                  cw, ch);
2098         ptr_cb = ebuf + 16 * wrap_y;
2099         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2100                                  wrap_c, wrap_c,
2101                                  mb_block_width, mb_block_height,
2102                                  mb_x * mb_block_width, mb_y * mb_block_height,
2103                                  cw, ch);
2104         ptr_cr = ebuf + 16 * wrap_y + 16;
2105     }
2106
2107     if (s->mb_intra) {
2108         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2109             int progressive_score, interlaced_score;
2110
2111             s->interlaced_dct = 0;
2112             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2113                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2114                                                      NULL, wrap_y, 8) - 400;
2115
2116             if (progressive_score > 0) {
2117                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2118                                                         NULL, wrap_y * 2, 8) +
2119                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2120                                                         NULL, wrap_y * 2, 8);
2121                 if (progressive_score > interlaced_score) {
2122                     s->interlaced_dct = 1;
2123
2124                     dct_offset = wrap_y;
2125                     uv_dct_offset = wrap_c;
2126                     wrap_y <<= 1;
2127                     if (s->chroma_format == CHROMA_422 ||
2128                         s->chroma_format == CHROMA_444)
2129                         wrap_c <<= 1;
2130                 }
2131             }
2132         }
2133
2134         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2135         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2136         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2137         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2138
2139         if (s->flags & CODEC_FLAG_GRAY) {
2140             skip_dct[4] = 1;
2141             skip_dct[5] = 1;
2142         } else {
2143             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2144             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2145             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2146                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2147                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2148             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2149                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2150                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2151                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2152                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2153                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2154                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2155             }
2156         }
2157     } else {
2158         op_pixels_func (*op_pix)[4];
2159         qpel_mc_func (*op_qpix)[16];
2160         uint8_t *dest_y, *dest_cb, *dest_cr;
2161
2162         dest_y  = s->dest[0];
2163         dest_cb = s->dest[1];
2164         dest_cr = s->dest[2];
2165
2166         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2167             op_pix  = s->hdsp.put_pixels_tab;
2168             op_qpix = s->qdsp.put_qpel_pixels_tab;
2169         } else {
2170             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2171             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2172         }
2173
2174         if (s->mv_dir & MV_DIR_FORWARD) {
2175             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2176                           s->last_picture.f->data,
2177                           op_pix, op_qpix);
2178             op_pix  = s->hdsp.avg_pixels_tab;
2179             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2180         }
2181         if (s->mv_dir & MV_DIR_BACKWARD) {
2182             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2183                           s->next_picture.f->data,
2184                           op_pix, op_qpix);
2185         }
2186
2187         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2188             int progressive_score, interlaced_score;
2189
2190             s->interlaced_dct = 0;
2191             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2192                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2193                                                      ptr_y + wrap_y * 8,
2194                                                      wrap_y, 8) - 400;
2195
2196             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2197                 progressive_score -= 400;
2198
2199             if (progressive_score > 0) {
2200                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2201                                                         wrap_y * 2, 8) +
2202                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2203                                                         ptr_y + wrap_y,
2204                                                         wrap_y * 2, 8);
2205
2206                 if (progressive_score > interlaced_score) {
2207                     s->interlaced_dct = 1;
2208
2209                     dct_offset = wrap_y;
2210                     uv_dct_offset = wrap_c;
2211                     wrap_y <<= 1;
2212                     if (s->chroma_format == CHROMA_422)
2213                         wrap_c <<= 1;
2214                 }
2215             }
2216         }
2217
2218         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2219         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2220         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2221                             dest_y + dct_offset, wrap_y);
2222         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2223                             dest_y + dct_offset + 8, wrap_y);
2224
2225         if (s->flags & CODEC_FLAG_GRAY) {
2226             skip_dct[4] = 1;
2227             skip_dct[5] = 1;
2228         } else {
2229             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2230             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2231             if (!s->chroma_y_shift) { /* 422 */
2232                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2233                                     dest_cb + uv_dct_offset, wrap_c);
2234                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2235                                     dest_cr + uv_dct_offset, wrap_c);
2236             }
2237         }
2238         /* pre quantization */
2239         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2240                 2 * s->qscale * s->qscale) {
2241             // FIXME optimize
2242             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2243                 skip_dct[0] = 1;
2244             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2245                 skip_dct[1] = 1;
2246             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2247                                wrap_y, 8) < 20 * s->qscale)
2248                 skip_dct[2] = 1;
2249             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2250                                wrap_y, 8) < 20 * s->qscale)
2251                 skip_dct[3] = 1;
2252             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2253                 skip_dct[4] = 1;
2254             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2255                 skip_dct[5] = 1;
2256             if (!s->chroma_y_shift) { /* 422 */
2257                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2258                                    dest_cb + uv_dct_offset,
2259                                    wrap_c, 8) < 20 * s->qscale)
2260                     skip_dct[6] = 1;
2261                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2262                                    dest_cr + uv_dct_offset,
2263                                    wrap_c, 8) < 20 * s->qscale)
2264                     skip_dct[7] = 1;
2265             }
2266         }
2267     }
2268
2269     if (s->quantizer_noise_shaping) {
2270         if (!skip_dct[0])
2271             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2272         if (!skip_dct[1])
2273             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2274         if (!skip_dct[2])
2275             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2276         if (!skip_dct[3])
2277             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2278         if (!skip_dct[4])
2279             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2280         if (!skip_dct[5])
2281             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2282         if (!s->chroma_y_shift) { /* 422 */
2283             if (!skip_dct[6])
2284                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2285                                   wrap_c);
2286             if (!skip_dct[7])
2287                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2288                                   wrap_c);
2289         }
2290         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2291     }
2292
2293     /* DCT & quantize */
2294     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2295     {
2296         for (i = 0; i < mb_block_count; i++) {
2297             if (!skip_dct[i]) {
2298                 int overflow;
2299                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2300                 // FIXME we could decide to change to quantizer instead of
2301                 // clipping
2302                 // JS: I don't think that would be a good idea it could lower
2303                 //     quality instead of improve it. Just INTRADC clipping
2304                 //     deserves changes in quantizer
2305                 if (overflow)
2306                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2307             } else
2308                 s->block_last_index[i] = -1;
2309         }
2310         if (s->quantizer_noise_shaping) {
2311             for (i = 0; i < mb_block_count; i++) {
2312                 if (!skip_dct[i]) {
2313                     s->block_last_index[i] =
2314                         dct_quantize_refine(s, s->block[i], weight[i],
2315                                             orig[i], i, s->qscale);
2316                 }
2317             }
2318         }
2319
2320         if (s->luma_elim_threshold && !s->mb_intra)
2321             for (i = 0; i < 4; i++)
2322                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2323         if (s->chroma_elim_threshold && !s->mb_intra)
2324             for (i = 4; i < mb_block_count; i++)
2325                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2326
2327         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2328             for (i = 0; i < mb_block_count; i++) {
2329                 if (s->block_last_index[i] == -1)
2330                     s->coded_score[i] = INT_MAX / 256;
2331             }
2332         }
2333     }
2334
2335     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2336         s->block_last_index[4] =
2337         s->block_last_index[5] = 0;
2338         s->block[4][0] =
2339         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2340         if (!s->chroma_y_shift) { /* 422 / 444 */
2341             for (i=6; i<12; i++) {
2342                 s->block_last_index[i] = 0;
2343                 s->block[i][0] = s->block[4][0];
2344             }
2345         }
2346     }
2347
2348     // non c quantize code returns incorrect block_last_index FIXME
2349     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2350         for (i = 0; i < mb_block_count; i++) {
2351             int j;
2352             if (s->block_last_index[i] > 0) {
2353                 for (j = 63; j > 0; j--) {
2354                     if (s->block[i][s->intra_scantable.permutated[j]])
2355                         break;
2356                 }
2357                 s->block_last_index[i] = j;
2358             }
2359         }
2360     }
2361
2362     /* huffman encode */
2363     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2364     case AV_CODEC_ID_MPEG1VIDEO:
2365     case AV_CODEC_ID_MPEG2VIDEO:
2366         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2367             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2368         break;
2369     case AV_CODEC_ID_MPEG4:
2370         if (CONFIG_MPEG4_ENCODER)
2371             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2372         break;
2373     case AV_CODEC_ID_MSMPEG4V2:
2374     case AV_CODEC_ID_MSMPEG4V3:
2375     case AV_CODEC_ID_WMV1:
2376         if (CONFIG_MSMPEG4_ENCODER)
2377             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2378         break;
2379     case AV_CODEC_ID_WMV2:
2380         if (CONFIG_WMV2_ENCODER)
2381             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2382         break;
2383     case AV_CODEC_ID_H261:
2384         if (CONFIG_H261_ENCODER)
2385             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2386         break;
2387     case AV_CODEC_ID_H263:
2388     case AV_CODEC_ID_H263P:
2389     case AV_CODEC_ID_FLV1:
2390     case AV_CODEC_ID_RV10:
2391     case AV_CODEC_ID_RV20:
2392         if (CONFIG_H263_ENCODER)
2393             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2394         break;
2395     case AV_CODEC_ID_MJPEG:
2396     case AV_CODEC_ID_AMV:
2397         if (CONFIG_MJPEG_ENCODER)
2398             ff_mjpeg_encode_mb(s, s->block);
2399         break;
2400     default:
2401         av_assert1(0);
2402     }
2403 }
2404
2405 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2406 {
2407     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2408     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2409     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2410 }
2411
2412 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2413     int i;
2414
2415     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2416
2417     /* mpeg1 */
2418     d->mb_skip_run= s->mb_skip_run;
2419     for(i=0; i<3; i++)
2420         d->last_dc[i] = s->last_dc[i];
2421
2422     /* statistics */
2423     d->mv_bits= s->mv_bits;
2424     d->i_tex_bits= s->i_tex_bits;
2425     d->p_tex_bits= s->p_tex_bits;
2426     d->i_count= s->i_count;
2427     d->f_count= s->f_count;
2428     d->b_count= s->b_count;
2429     d->skip_count= s->skip_count;
2430     d->misc_bits= s->misc_bits;
2431     d->last_bits= 0;
2432
2433     d->mb_skipped= 0;
2434     d->qscale= s->qscale;
2435     d->dquant= s->dquant;
2436
2437     d->esc3_level_length= s->esc3_level_length;
2438 }
2439
2440 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2441     int i;
2442
2443     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2444     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2445
2446     /* mpeg1 */
2447     d->mb_skip_run= s->mb_skip_run;
2448     for(i=0; i<3; i++)
2449         d->last_dc[i] = s->last_dc[i];
2450
2451     /* statistics */
2452     d->mv_bits= s->mv_bits;
2453     d->i_tex_bits= s->i_tex_bits;
2454     d->p_tex_bits= s->p_tex_bits;
2455     d->i_count= s->i_count;
2456     d->f_count= s->f_count;
2457     d->b_count= s->b_count;
2458     d->skip_count= s->skip_count;
2459     d->misc_bits= s->misc_bits;
2460
2461     d->mb_intra= s->mb_intra;
2462     d->mb_skipped= s->mb_skipped;
2463     d->mv_type= s->mv_type;
2464     d->mv_dir= s->mv_dir;
2465     d->pb= s->pb;
2466     if(s->data_partitioning){
2467         d->pb2= s->pb2;
2468         d->tex_pb= s->tex_pb;
2469     }
2470     d->block= s->block;
2471     for(i=0; i<8; i++)
2472         d->block_last_index[i]= s->block_last_index[i];
2473     d->interlaced_dct= s->interlaced_dct;
2474     d->qscale= s->qscale;
2475
2476     d->esc3_level_length= s->esc3_level_length;
2477 }
2478
2479 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2480                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2481                            int *dmin, int *next_block, int motion_x, int motion_y)
2482 {
2483     int score;
2484     uint8_t *dest_backup[3];
2485
2486     copy_context_before_encode(s, backup, type);
2487
2488     s->block= s->blocks[*next_block];
2489     s->pb= pb[*next_block];
2490     if(s->data_partitioning){
2491         s->pb2   = pb2   [*next_block];
2492         s->tex_pb= tex_pb[*next_block];
2493     }
2494
2495     if(*next_block){
2496         memcpy(dest_backup, s->dest, sizeof(s->dest));
2497         s->dest[0] = s->rd_scratchpad;
2498         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2499         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2500         av_assert0(s->linesize >= 32); //FIXME
2501     }
2502
2503     encode_mb(s, motion_x, motion_y);
2504
2505     score= put_bits_count(&s->pb);
2506     if(s->data_partitioning){
2507         score+= put_bits_count(&s->pb2);
2508         score+= put_bits_count(&s->tex_pb);
2509     }
2510
2511     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2512         ff_mpv_decode_mb(s, s->block);
2513
2514         score *= s->lambda2;
2515         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2516     }
2517
2518     if(*next_block){
2519         memcpy(s->dest, dest_backup, sizeof(s->dest));
2520     }
2521
2522     if(score<*dmin){
2523         *dmin= score;
2524         *next_block^=1;
2525
2526         copy_context_after_encode(best, s, type);
2527     }
2528 }
2529
2530 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2531     uint32_t *sq = ff_square_tab + 256;
2532     int acc=0;
2533     int x,y;
2534
2535     if(w==16 && h==16)
2536         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2537     else if(w==8 && h==8)
2538         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2539
2540     for(y=0; y<h; y++){
2541         for(x=0; x<w; x++){
2542             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2543         }
2544     }
2545
2546     av_assert2(acc>=0);
2547
2548     return acc;
2549 }
2550
2551 static int sse_mb(MpegEncContext *s){
2552     int w= 16;
2553     int h= 16;
2554
2555     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2556     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2557
2558     if(w==16 && h==16)
2559       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2560         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2561                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2562                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2563       }else{
2564         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2565                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2566                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2567       }
2568     else
2569         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2570                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2571                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2572 }
2573
2574 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2575     MpegEncContext *s= *(void**)arg;
2576
2577
2578     s->me.pre_pass=1;
2579     s->me.dia_size= s->avctx->pre_dia_size;
2580     s->first_slice_line=1;
2581     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2582         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2583             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2584         }
2585         s->first_slice_line=0;
2586     }
2587
2588     s->me.pre_pass=0;
2589
2590     return 0;
2591 }
2592
2593 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2594     MpegEncContext *s= *(void**)arg;
2595
2596     ff_check_alignment();
2597
2598     s->me.dia_size= s->avctx->dia_size;
2599     s->first_slice_line=1;
2600     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2601         s->mb_x=0; //for block init below
2602         ff_init_block_index(s);
2603         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2604             s->block_index[0]+=2;
2605             s->block_index[1]+=2;
2606             s->block_index[2]+=2;
2607             s->block_index[3]+=2;
2608
2609             /* compute motion vector & mb_type and store in context */
2610             if(s->pict_type==AV_PICTURE_TYPE_B)
2611                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2612             else
2613                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2614         }
2615         s->first_slice_line=0;
2616     }
2617     return 0;
2618 }
2619
2620 static int mb_var_thread(AVCodecContext *c, void *arg){
2621     MpegEncContext *s= *(void**)arg;
2622     int mb_x, mb_y;
2623
2624     ff_check_alignment();
2625
2626     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2627         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2628             int xx = mb_x * 16;
2629             int yy = mb_y * 16;
2630             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2631             int varc;
2632             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2633
2634             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2635                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2636
2637             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2638             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2639             s->me.mb_var_sum_temp    += varc;
2640         }
2641     }
2642     return 0;
2643 }
2644
2645 static void write_slice_end(MpegEncContext *s){
2646     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2647         if(s->partitioned_frame){
2648             ff_mpeg4_merge_partitions(s);
2649         }
2650
2651         ff_mpeg4_stuffing(&s->pb);
2652     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2653         ff_mjpeg_encode_stuffing(s);
2654     }
2655
2656     avpriv_align_put_bits(&s->pb);
2657     flush_put_bits(&s->pb);
2658
2659     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2660         s->misc_bits+= get_bits_diff(s);
2661 }
2662
2663 static void write_mb_info(MpegEncContext *s)
2664 {
2665     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2666     int offset = put_bits_count(&s->pb);
2667     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2668     int gobn = s->mb_y / s->gob_index;
2669     int pred_x, pred_y;
2670     if (CONFIG_H263_ENCODER)
2671         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2672     bytestream_put_le32(&ptr, offset);
2673     bytestream_put_byte(&ptr, s->qscale);
2674     bytestream_put_byte(&ptr, gobn);
2675     bytestream_put_le16(&ptr, mba);
2676     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2677     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2678     /* 4MV not implemented */
2679     bytestream_put_byte(&ptr, 0); /* hmv2 */
2680     bytestream_put_byte(&ptr, 0); /* vmv2 */
2681 }
2682
2683 static void update_mb_info(MpegEncContext *s, int startcode)
2684 {
2685     if (!s->mb_info)
2686         return;
2687     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2688         s->mb_info_size += 12;
2689         s->prev_mb_info = s->last_mb_info;
2690     }
2691     if (startcode) {
2692         s->prev_mb_info = put_bits_count(&s->pb)/8;
2693         /* This might have incremented mb_info_size above, and we return without
2694          * actually writing any info into that slot yet. But in that case,
2695          * this will be called again at the start of the after writing the
2696          * start code, actually writing the mb info. */
2697         return;
2698     }
2699
2700     s->last_mb_info = put_bits_count(&s->pb)/8;
2701     if (!s->mb_info_size)
2702         s->mb_info_size += 12;
2703     write_mb_info(s);
2704 }
2705
2706 static int encode_thread(AVCodecContext *c, void *arg){
2707     MpegEncContext *s= *(void**)arg;
2708     int mb_x, mb_y, pdif = 0;
2709     int chr_h= 16>>s->chroma_y_shift;
2710     int i, j;
2711     MpegEncContext best_s, backup_s;
2712     uint8_t bit_buf[2][MAX_MB_BYTES];
2713     uint8_t bit_buf2[2][MAX_MB_BYTES];
2714     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2715     PutBitContext pb[2], pb2[2], tex_pb[2];
2716
2717     ff_check_alignment();
2718
2719     for(i=0; i<2; i++){
2720         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2721         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2722         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2723     }
2724
2725     s->last_bits= put_bits_count(&s->pb);
2726     s->mv_bits=0;
2727     s->misc_bits=0;
2728     s->i_tex_bits=0;
2729     s->p_tex_bits=0;
2730     s->i_count=0;
2731     s->f_count=0;
2732     s->b_count=0;
2733     s->skip_count=0;
2734
2735     for(i=0; i<3; i++){
2736         /* init last dc values */
2737         /* note: quant matrix value (8) is implied here */
2738         s->last_dc[i] = 128 << s->intra_dc_precision;
2739
2740         s->current_picture.error[i] = 0;
2741     }
2742     if(s->codec_id==AV_CODEC_ID_AMV){
2743         s->last_dc[0] = 128*8/13;
2744         s->last_dc[1] = 128*8/14;
2745         s->last_dc[2] = 128*8/14;
2746     }
2747     s->mb_skip_run = 0;
2748     memset(s->last_mv, 0, sizeof(s->last_mv));
2749
2750     s->last_mv_dir = 0;
2751
2752     switch(s->codec_id){
2753     case AV_CODEC_ID_H263:
2754     case AV_CODEC_ID_H263P:
2755     case AV_CODEC_ID_FLV1:
2756         if (CONFIG_H263_ENCODER)
2757             s->gob_index = ff_h263_get_gob_height(s);
2758         break;
2759     case AV_CODEC_ID_MPEG4:
2760         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2761             ff_mpeg4_init_partitions(s);
2762         break;
2763     }
2764
2765     s->resync_mb_x=0;
2766     s->resync_mb_y=0;
2767     s->first_slice_line = 1;
2768     s->ptr_lastgob = s->pb.buf;
2769     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2770         s->mb_x=0;
2771         s->mb_y= mb_y;
2772
2773         ff_set_qscale(s, s->qscale);
2774         ff_init_block_index(s);
2775
2776         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2777             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2778             int mb_type= s->mb_type[xy];
2779 //            int d;
2780             int dmin= INT_MAX;
2781             int dir;
2782
2783             if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES
2784                 && s->slice_context_count == 1
2785                 && s->pb.buf == s->avctx->internal->byte_buffer) {
2786                 int new_size =  s->avctx->internal->byte_buffer_size
2787                               + s->avctx->internal->byte_buffer_size/4
2788                               + s->mb_width*MAX_MB_BYTES;
2789                 int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2790                 int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2791
2792                 uint8_t *new_buffer = NULL;
2793                 int new_buffer_size = 0;
2794
2795                 av_fast_padded_malloc(&new_buffer, &new_buffer_size, new_size);
2796                 if (new_buffer) {
2797                     memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2798                     av_free(s->avctx->internal->byte_buffer);
2799                     s->avctx->internal->byte_buffer      = new_buffer;
2800                     s->avctx->internal->byte_buffer_size = new_buffer_size;
2801                     rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2802                     s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2803                     s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2804                 }
2805             }
2806             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2807                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2808                 return -1;
2809             }
2810             if(s->data_partitioning){
2811                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2812                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2813                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2814                     return -1;
2815                 }
2816             }
2817
2818             s->mb_x = mb_x;
2819             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2820             ff_update_block_index(s);
2821
2822             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2823                 ff_h261_reorder_mb_index(s);
2824                 xy= s->mb_y*s->mb_stride + s->mb_x;
2825                 mb_type= s->mb_type[xy];
2826             }
2827
2828             /* write gob / video packet header  */
2829             if(s->rtp_mode){
2830                 int current_packet_size, is_gob_start;
2831
2832                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2833
2834                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2835
2836                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2837
2838                 switch(s->codec_id){
2839                 case AV_CODEC_ID_H261:
2840                     is_gob_start=0;//FIXME
2841                     break;
2842                 case AV_CODEC_ID_H263:
2843                 case AV_CODEC_ID_H263P:
2844                     if(!s->h263_slice_structured)
2845                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2846                     break;
2847                 case AV_CODEC_ID_MPEG2VIDEO:
2848                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2849                 case AV_CODEC_ID_MPEG1VIDEO:
2850                     if(s->mb_skip_run) is_gob_start=0;
2851                     break;
2852                 case AV_CODEC_ID_MJPEG:
2853                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2854                     break;
2855                 }
2856
2857                 if(is_gob_start){
2858                     if(s->start_mb_y != mb_y || mb_x!=0){
2859                         write_slice_end(s);
2860
2861                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2862                             ff_mpeg4_init_partitions(s);
2863                         }
2864                     }
2865
2866                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2867                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2868
2869                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2870                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2871                         int d = 100 / s->error_rate;
2872                         if(r % d == 0){
2873                             current_packet_size=0;
2874                             s->pb.buf_ptr= s->ptr_lastgob;
2875                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2876                         }
2877                     }
2878
2879                     if (s->avctx->rtp_callback){
2880                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2881                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2882                     }
2883                     update_mb_info(s, 1);
2884
2885                     switch(s->codec_id){
2886                     case AV_CODEC_ID_MPEG4:
2887                         if (CONFIG_MPEG4_ENCODER) {
2888                             ff_mpeg4_encode_video_packet_header(s);
2889                             ff_mpeg4_clean_buffers(s);
2890                         }
2891                     break;
2892                     case AV_CODEC_ID_MPEG1VIDEO:
2893                     case AV_CODEC_ID_MPEG2VIDEO:
2894                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2895                             ff_mpeg1_encode_slice_header(s);
2896                             ff_mpeg1_clean_buffers(s);
2897                         }
2898                     break;
2899                     case AV_CODEC_ID_H263:
2900                     case AV_CODEC_ID_H263P:
2901                         if (CONFIG_H263_ENCODER)
2902                             ff_h263_encode_gob_header(s, mb_y);
2903                     break;
2904                     }
2905
2906                     if(s->flags&CODEC_FLAG_PASS1){
2907                         int bits= put_bits_count(&s->pb);
2908                         s->misc_bits+= bits - s->last_bits;
2909                         s->last_bits= bits;
2910                     }
2911
2912                     s->ptr_lastgob += current_packet_size;
2913                     s->first_slice_line=1;
2914                     s->resync_mb_x=mb_x;
2915                     s->resync_mb_y=mb_y;
2916                 }
2917             }
2918
2919             if(  (s->resync_mb_x   == s->mb_x)
2920                && s->resync_mb_y+1 == s->mb_y){
2921                 s->first_slice_line=0;
2922             }
2923
2924             s->mb_skipped=0;
2925             s->dquant=0; //only for QP_RD
2926
2927             update_mb_info(s, 0);
2928
2929             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2930                 int next_block=0;
2931                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2932
2933                 copy_context_before_encode(&backup_s, s, -1);
2934                 backup_s.pb= s->pb;
2935                 best_s.data_partitioning= s->data_partitioning;
2936                 best_s.partitioned_frame= s->partitioned_frame;
2937                 if(s->data_partitioning){
2938                     backup_s.pb2= s->pb2;
2939                     backup_s.tex_pb= s->tex_pb;
2940                 }
2941
2942                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2943                     s->mv_dir = MV_DIR_FORWARD;
2944                     s->mv_type = MV_TYPE_16X16;
2945                     s->mb_intra= 0;
2946                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2947                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2948                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2949                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2950                 }
2951                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2952                     s->mv_dir = MV_DIR_FORWARD;
2953                     s->mv_type = MV_TYPE_FIELD;
2954                     s->mb_intra= 0;
2955                     for(i=0; i<2; i++){
2956                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2957                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2958                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2959                     }
2960                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2961                                  &dmin, &next_block, 0, 0);
2962                 }
2963                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2964                     s->mv_dir = MV_DIR_FORWARD;
2965                     s->mv_type = MV_TYPE_16X16;
2966                     s->mb_intra= 0;
2967                     s->mv[0][0][0] = 0;
2968                     s->mv[0][0][1] = 0;
2969                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2970                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2971                 }
2972                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2973                     s->mv_dir = MV_DIR_FORWARD;
2974                     s->mv_type = MV_TYPE_8X8;
2975                     s->mb_intra= 0;
2976                     for(i=0; i<4; i++){
2977                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2978                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2979                     }
2980                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2981                                  &dmin, &next_block, 0, 0);
2982                 }
2983                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2984                     s->mv_dir = MV_DIR_FORWARD;
2985                     s->mv_type = MV_TYPE_16X16;
2986                     s->mb_intra= 0;
2987                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2988                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2989                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2990                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2991                 }
2992                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2993                     s->mv_dir = MV_DIR_BACKWARD;
2994                     s->mv_type = MV_TYPE_16X16;
2995                     s->mb_intra= 0;
2996                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2997                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2998                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2999                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3000                 }
3001                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3002                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3003                     s->mv_type = MV_TYPE_16X16;
3004                     s->mb_intra= 0;
3005                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3006                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3007                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3008                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3009                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3010                                  &dmin, &next_block, 0, 0);
3011                 }
3012                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3013                     s->mv_dir = MV_DIR_FORWARD;
3014                     s->mv_type = MV_TYPE_FIELD;
3015                     s->mb_intra= 0;
3016                     for(i=0; i<2; i++){
3017                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3018                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3019                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3020                     }
3021                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3022                                  &dmin, &next_block, 0, 0);
3023                 }
3024                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3025                     s->mv_dir = MV_DIR_BACKWARD;
3026                     s->mv_type = MV_TYPE_FIELD;
3027                     s->mb_intra= 0;
3028                     for(i=0; i<2; i++){
3029                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3030                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3031                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3032                     }
3033                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3034                                  &dmin, &next_block, 0, 0);
3035                 }
3036                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3037                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3038                     s->mv_type = MV_TYPE_FIELD;
3039                     s->mb_intra= 0;
3040                     for(dir=0; dir<2; dir++){
3041                         for(i=0; i<2; i++){
3042                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3043                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3044                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3045                         }
3046                     }
3047                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3048                                  &dmin, &next_block, 0, 0);
3049                 }
3050                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3051                     s->mv_dir = 0;
3052                     s->mv_type = MV_TYPE_16X16;
3053                     s->mb_intra= 1;
3054                     s->mv[0][0][0] = 0;
3055                     s->mv[0][0][1] = 0;
3056                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3057                                  &dmin, &next_block, 0, 0);
3058                     if(s->h263_pred || s->h263_aic){
3059                         if(best_s.mb_intra)
3060                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3061                         else
3062                             ff_clean_intra_table_entries(s); //old mode?
3063                     }
3064                 }
3065
3066                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3067                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3068                         const int last_qp= backup_s.qscale;
3069                         int qpi, qp, dc[6];
3070                         int16_t ac[6][16];
3071                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3072                         static const int dquant_tab[4]={-1,1,-2,2};
3073                         int storecoefs = s->mb_intra && s->dc_val[0];
3074
3075                         av_assert2(backup_s.dquant == 0);
3076
3077                         //FIXME intra
3078                         s->mv_dir= best_s.mv_dir;
3079                         s->mv_type = MV_TYPE_16X16;
3080                         s->mb_intra= best_s.mb_intra;
3081                         s->mv[0][0][0] = best_s.mv[0][0][0];
3082                         s->mv[0][0][1] = best_s.mv[0][0][1];
3083                         s->mv[1][0][0] = best_s.mv[1][0][0];
3084                         s->mv[1][0][1] = best_s.mv[1][0][1];
3085
3086                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3087                         for(; qpi<4; qpi++){
3088                             int dquant= dquant_tab[qpi];
3089                             qp= last_qp + dquant;
3090                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3091                                 continue;
3092                             backup_s.dquant= dquant;
3093                             if(storecoefs){
3094                                 for(i=0; i<6; i++){
3095                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3096                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3097                                 }
3098                             }
3099
3100                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3101                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3102                             if(best_s.qscale != qp){
3103                                 if(storecoefs){
3104                                     for(i=0; i<6; i++){
3105                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3106                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3107                                     }
3108                                 }
3109                             }
3110                         }
3111                     }
3112                 }
3113                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3114                     int mx= s->b_direct_mv_table[xy][0];
3115                     int my= s->b_direct_mv_table[xy][1];
3116
3117                     backup_s.dquant = 0;
3118                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3119                     s->mb_intra= 0;
3120                     ff_mpeg4_set_direct_mv(s, mx, my);
3121                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3122                                  &dmin, &next_block, mx, my);
3123                 }
3124                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3125                     backup_s.dquant = 0;
3126                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3127                     s->mb_intra= 0;
3128                     ff_mpeg4_set_direct_mv(s, 0, 0);
3129                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3130                                  &dmin, &next_block, 0, 0);
3131                 }
3132                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3133                     int coded=0;
3134                     for(i=0; i<6; i++)
3135                         coded |= s->block_last_index[i];
3136                     if(coded){
3137                         int mx,my;
3138                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3139                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3140                             mx=my=0; //FIXME find the one we actually used
3141                             ff_mpeg4_set_direct_mv(s, mx, my);
3142                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3143                             mx= s->mv[1][0][0];
3144                             my= s->mv[1][0][1];
3145                         }else{
3146                             mx= s->mv[0][0][0];
3147                             my= s->mv[0][0][1];
3148                         }
3149
3150                         s->mv_dir= best_s.mv_dir;
3151                         s->mv_type = best_s.mv_type;
3152                         s->mb_intra= 0;
3153 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3154                         s->mv[0][0][1] = best_s.mv[0][0][1];
3155                         s->mv[1][0][0] = best_s.mv[1][0][0];
3156                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3157                         backup_s.dquant= 0;
3158                         s->skipdct=1;
3159                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3160                                         &dmin, &next_block, mx, my);
3161                         s->skipdct=0;
3162                     }
3163                 }
3164
3165                 s->current_picture.qscale_table[xy] = best_s.qscale;
3166
3167                 copy_context_after_encode(s, &best_s, -1);
3168
3169                 pb_bits_count= put_bits_count(&s->pb);
3170                 flush_put_bits(&s->pb);
3171                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3172                 s->pb= backup_s.pb;
3173
3174                 if(s->data_partitioning){
3175                     pb2_bits_count= put_bits_count(&s->pb2);
3176                     flush_put_bits(&s->pb2);
3177                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3178                     s->pb2= backup_s.pb2;
3179
3180                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3181                     flush_put_bits(&s->tex_pb);
3182                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3183                     s->tex_pb= backup_s.tex_pb;
3184                 }
3185                 s->last_bits= put_bits_count(&s->pb);
3186
3187                 if (CONFIG_H263_ENCODER &&
3188                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3189                     ff_h263_update_motion_val(s);
3190
3191                 if(next_block==0){ //FIXME 16 vs linesize16
3192                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3193                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3194                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3195                 }
3196
3197                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3198                     ff_mpv_decode_mb(s, s->block);
3199             } else {
3200                 int motion_x = 0, motion_y = 0;
3201                 s->mv_type=MV_TYPE_16X16;
3202                 // only one MB-Type possible
3203
3204                 switch(mb_type){
3205                 case CANDIDATE_MB_TYPE_INTRA:
3206                     s->mv_dir = 0;
3207                     s->mb_intra= 1;
3208                     motion_x= s->mv[0][0][0] = 0;
3209                     motion_y= s->mv[0][0][1] = 0;
3210                     break;
3211                 case CANDIDATE_MB_TYPE_INTER:
3212                     s->mv_dir = MV_DIR_FORWARD;
3213                     s->mb_intra= 0;
3214                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3215                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3216                     break;
3217                 case CANDIDATE_MB_TYPE_INTER_I:
3218                     s->mv_dir = MV_DIR_FORWARD;
3219                     s->mv_type = MV_TYPE_FIELD;
3220                     s->mb_intra= 0;
3221                     for(i=0; i<2; i++){
3222                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3223                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3224                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3225                     }
3226                     break;
3227                 case CANDIDATE_MB_TYPE_INTER4V:
3228                     s->mv_dir = MV_DIR_FORWARD;
3229                     s->mv_type = MV_TYPE_8X8;
3230                     s->mb_intra= 0;
3231                     for(i=0; i<4; i++){
3232                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3233                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3234                     }
3235                     break;
3236                 case CANDIDATE_MB_TYPE_DIRECT:
3237                     if (CONFIG_MPEG4_ENCODER) {
3238                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3239                         s->mb_intra= 0;
3240                         motion_x=s->b_direct_mv_table[xy][0];
3241                         motion_y=s->b_direct_mv_table[xy][1];
3242                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3243                     }
3244                     break;
3245                 case CANDIDATE_MB_TYPE_DIRECT0:
3246                     if (CONFIG_MPEG4_ENCODER) {
3247                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3248                         s->mb_intra= 0;
3249                         ff_mpeg4_set_direct_mv(s, 0, 0);
3250                     }
3251                     break;
3252                 case CANDIDATE_MB_TYPE_BIDIR:
3253                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3254                     s->mb_intra= 0;
3255                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3256                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3257                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3258                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3259                     break;
3260                 case CANDIDATE_MB_TYPE_BACKWARD:
3261                     s->mv_dir = MV_DIR_BACKWARD;
3262                     s->mb_intra= 0;
3263                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3264                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3265                     break;
3266                 case CANDIDATE_MB_TYPE_FORWARD:
3267                     s->mv_dir = MV_DIR_FORWARD;
3268                     s->mb_intra= 0;
3269                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3270                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3271                     break;
3272                 case CANDIDATE_MB_TYPE_FORWARD_I:
3273                     s->mv_dir = MV_DIR_FORWARD;
3274                     s->mv_type = MV_TYPE_FIELD;
3275                     s->mb_intra= 0;
3276                     for(i=0; i<2; i++){
3277                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3278                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3279                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3280                     }
3281                     break;
3282                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3283                     s->mv_dir = MV_DIR_BACKWARD;
3284                     s->mv_type = MV_TYPE_FIELD;
3285                     s->mb_intra= 0;
3286                     for(i=0; i<2; i++){
3287                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3288                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3289                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3290                     }
3291                     break;
3292                 case CANDIDATE_MB_TYPE_BIDIR_I:
3293                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3294                     s->mv_type = MV_TYPE_FIELD;
3295                     s->mb_intra= 0;
3296                     for(dir=0; dir<2; dir++){
3297                         for(i=0; i<2; i++){
3298                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3299                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3300                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3301                         }
3302                     }
3303                     break;
3304                 default:
3305                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3306                 }
3307
3308                 encode_mb(s, motion_x, motion_y);
3309
3310                 // RAL: Update last macroblock type
3311                 s->last_mv_dir = s->mv_dir;
3312
3313                 if (CONFIG_H263_ENCODER &&
3314                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3315                     ff_h263_update_motion_val(s);
3316
3317                 ff_mpv_decode_mb(s, s->block);
3318             }
3319
3320             /* clean the MV table in IPS frames for direct mode in B frames */
3321             if(s->mb_intra /* && I,P,S_TYPE */){
3322                 s->p_mv_table[xy][0]=0;
3323                 s->p_mv_table[xy][1]=0;
3324             }
3325
3326             if(s->flags&CODEC_FLAG_PSNR){
3327                 int w= 16;
3328                 int h= 16;
3329
3330                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3331                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3332
3333                 s->current_picture.error[0] += sse(
3334                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3335                     s->dest[0], w, h, s->linesize);
3336                 s->current_picture.error[1] += sse(
3337                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3338                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3339                 s->current_picture.error[2] += sse(
3340                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3341                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3342             }
3343             if(s->loop_filter){
3344                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3345                     ff_h263_loop_filter(s);
3346             }
3347             av_dlog(s->avctx, "MB %d %d bits\n",
3348                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3349         }
3350     }
3351
3352     //not beautiful here but we must write it before flushing so it has to be here
3353     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3354         ff_msmpeg4_encode_ext_header(s);
3355
3356     write_slice_end(s);
3357
3358     /* Send the last GOB if RTP */
3359     if (s->avctx->rtp_callback) {
3360         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3361         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3362         /* Call the RTP callback to send the last GOB */
3363         emms_c();
3364         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3365     }
3366
3367     return 0;
3368 }
3369
3370 #define MERGE(field) dst->field += src->field; src->field=0
3371 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3372     MERGE(me.scene_change_score);
3373     MERGE(me.mc_mb_var_sum_temp);
3374     MERGE(me.mb_var_sum_temp);
3375 }
3376
3377 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3378     int i;
3379
3380     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3381     MERGE(dct_count[1]);
3382     MERGE(mv_bits);
3383     MERGE(i_tex_bits);
3384     MERGE(p_tex_bits);
3385     MERGE(i_count);
3386     MERGE(f_count);
3387     MERGE(b_count);
3388     MERGE(skip_count);
3389     MERGE(misc_bits);
3390     MERGE(er.error_count);
3391     MERGE(padding_bug_score);
3392     MERGE(current_picture.error[0]);
3393     MERGE(current_picture.error[1]);
3394     MERGE(current_picture.error[2]);
3395
3396     if(dst->avctx->noise_reduction){
3397         for(i=0; i<64; i++){
3398             MERGE(dct_error_sum[0][i]);
3399             MERGE(dct_error_sum[1][i]);
3400         }
3401     }
3402
3403     assert(put_bits_count(&src->pb) % 8 ==0);
3404     assert(put_bits_count(&dst->pb) % 8 ==0);
3405     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3406     flush_put_bits(&dst->pb);
3407 }
3408
3409 static int estimate_qp(MpegEncContext *s, int dry_run){
3410     if (s->next_lambda){
3411         s->current_picture_ptr->f->quality =
3412         s->current_picture.f->quality = s->next_lambda;
3413         if(!dry_run) s->next_lambda= 0;
3414     } else if (!s->fixed_qscale) {
3415         s->current_picture_ptr->f->quality =
3416         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3417         if (s->current_picture.f->quality < 0)
3418             return -1;
3419     }
3420
3421     if(s->adaptive_quant){
3422         switch(s->codec_id){
3423         case AV_CODEC_ID_MPEG4:
3424             if (CONFIG_MPEG4_ENCODER)
3425                 ff_clean_mpeg4_qscales(s);
3426             break;
3427         case AV_CODEC_ID_H263:
3428         case AV_CODEC_ID_H263P:
3429         case AV_CODEC_ID_FLV1:
3430             if (CONFIG_H263_ENCODER)
3431                 ff_clean_h263_qscales(s);
3432             break;
3433         default:
3434             ff_init_qscale_tab(s);
3435         }
3436
3437         s->lambda= s->lambda_table[0];
3438         //FIXME broken
3439     }else
3440         s->lambda = s->current_picture.f->quality;
3441     update_qscale(s);
3442     return 0;
3443 }
3444
3445 /* must be called before writing the header */
3446 static void set_frame_distances(MpegEncContext * s){
3447     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3448     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3449
3450     if(s->pict_type==AV_PICTURE_TYPE_B){
3451         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3452         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3453     }else{
3454         s->pp_time= s->time - s->last_non_b_time;
3455         s->last_non_b_time= s->time;
3456         assert(s->picture_number==0 || s->pp_time > 0);
3457     }
3458 }
3459
3460 static int encode_picture(MpegEncContext *s, int picture_number)
3461 {
3462     int i, ret;
3463     int bits;
3464     int context_count = s->slice_context_count;
3465
3466     s->picture_number = picture_number;
3467
3468     /* Reset the average MB variance */
3469     s->me.mb_var_sum_temp    =
3470     s->me.mc_mb_var_sum_temp = 0;
3471
3472     /* we need to initialize some time vars before we can encode b-frames */
3473     // RAL: Condition added for MPEG1VIDEO
3474     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3475         set_frame_distances(s);
3476     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3477         ff_set_mpeg4_time(s);
3478
3479     s->me.scene_change_score=0;
3480
3481 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3482
3483     if(s->pict_type==AV_PICTURE_TYPE_I){
3484         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3485         else                        s->no_rounding=0;
3486     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3487         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3488             s->no_rounding ^= 1;
3489     }
3490
3491     if(s->flags & CODEC_FLAG_PASS2){
3492         if (estimate_qp(s,1) < 0)
3493             return -1;
3494         ff_get_2pass_fcode(s);
3495     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3496         if(s->pict_type==AV_PICTURE_TYPE_B)
3497             s->lambda= s->last_lambda_for[s->pict_type];
3498         else
3499             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3500         update_qscale(s);
3501     }
3502
3503     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3504         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3505         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3506         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3507         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3508     }
3509
3510     s->mb_intra=0; //for the rate distortion & bit compare functions
3511     for(i=1; i<context_count; i++){
3512         ret = ff_update_duplicate_context(s->thread_context[i], s);
3513         if (ret < 0)
3514             return ret;
3515     }
3516
3517     if(ff_init_me(s)<0)
3518         return -1;
3519
3520     /* Estimate motion for every MB */
3521     if(s->pict_type != AV_PICTURE_TYPE_I){
3522         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3523         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3524         if (s->pict_type != AV_PICTURE_TYPE_B) {
3525             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3526                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3527             }
3528         }
3529
3530         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3531     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3532         /* I-Frame */
3533         for(i=0; i<s->mb_stride*s->mb_height; i++)
3534             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3535
3536         if(!s->fixed_qscale){
3537             /* finding spatial complexity for I-frame rate control */
3538             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3539         }
3540     }
3541     for(i=1; i<context_count; i++){
3542         merge_context_after_me(s, s->thread_context[i]);
3543     }
3544     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3545     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3546     emms_c();
3547
3548     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3549         s->pict_type= AV_PICTURE_TYPE_I;
3550         for(i=0; i<s->mb_stride*s->mb_height; i++)
3551             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3552         if(s->msmpeg4_version >= 3)
3553             s->no_rounding=1;
3554         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3555                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3556     }
3557
3558     if(!s->umvplus){
3559         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3560             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3561
3562             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3563                 int a,b;
3564                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3565                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3566                 s->f_code= FFMAX3(s->f_code, a, b);
3567             }
3568
3569             ff_fix_long_p_mvs(s);
3570             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3571             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3572                 int j;
3573                 for(i=0; i<2; i++){
3574                     for(j=0; j<2; j++)
3575                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3576                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3577                 }
3578             }
3579         }
3580
3581         if(s->pict_type==AV_PICTURE_TYPE_B){
3582             int a, b;
3583
3584             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3585             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3586             s->f_code = FFMAX(a, b);
3587
3588             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3589             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3590             s->b_code = FFMAX(a, b);
3591
3592             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3593             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3594             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3595             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3596             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3597                 int dir, j;
3598                 for(dir=0; dir<2; dir++){
3599                     for(i=0; i<2; i++){
3600                         for(j=0; j<2; j++){
3601                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3602                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3603                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3604                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3605                         }
3606                     }
3607                 }
3608             }
3609         }
3610     }
3611
3612     if (estimate_qp(s, 0) < 0)
3613         return -1;
3614
3615     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3616         s->qscale= 3; //reduce clipping problems
3617
3618     if (s->out_format == FMT_MJPEG) {
3619         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3620         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3621
3622         if (s->avctx->intra_matrix) {
3623             chroma_matrix =
3624             luma_matrix = s->avctx->intra_matrix;
3625         }
3626         if (s->avctx->chroma_intra_matrix)
3627             chroma_matrix = s->avctx->chroma_intra_matrix;
3628
3629         /* for mjpeg, we do include qscale in the matrix */
3630         for(i=1;i<64;i++){
3631             int j = s->idsp.idct_permutation[i];
3632
3633             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3634             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3635         }
3636         s->y_dc_scale_table=
3637         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3638         s->chroma_intra_matrix[0] =
3639         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3640         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3641                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3642         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3643                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3644         s->qscale= 8;
3645     }
3646     if(s->codec_id == AV_CODEC_ID_AMV){
3647         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3648         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3649         for(i=1;i<64;i++){
3650             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3651
3652             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3653             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3654         }
3655         s->y_dc_scale_table= y;
3656         s->c_dc_scale_table= c;
3657         s->intra_matrix[0] = 13;
3658         s->chroma_intra_matrix[0] = 14;
3659         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3660                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3661         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3662                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3663         s->qscale= 8;
3664     }
3665
3666     //FIXME var duplication
3667     s->current_picture_ptr->f->key_frame =
3668     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3669     s->current_picture_ptr->f->pict_type =
3670     s->current_picture.f->pict_type = s->pict_type;
3671
3672     if (s->current_picture.f->key_frame)
3673         s->picture_in_gop_number=0;
3674
3675     s->mb_x = s->mb_y = 0;
3676     s->last_bits= put_bits_count(&s->pb);
3677     switch(s->out_format) {
3678     case FMT_MJPEG:
3679         if (CONFIG_MJPEG_ENCODER)
3680             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3681                                            s->intra_matrix, s->chroma_intra_matrix);
3682         break;
3683     case FMT_H261:
3684         if (CONFIG_H261_ENCODER)
3685             ff_h261_encode_picture_header(s, picture_number);
3686         break;
3687     case FMT_H263:
3688         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3689             ff_wmv2_encode_picture_header(s, picture_number);
3690         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3691             ff_msmpeg4_encode_picture_header(s, picture_number);
3692         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3693             ff_mpeg4_encode_picture_header(s, picture_number);
3694         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3695             ff_rv10_encode_picture_header(s, picture_number);
3696         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3697             ff_rv20_encode_picture_header(s, picture_number);
3698         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3699             ff_flv_encode_picture_header(s, picture_number);
3700         else if (CONFIG_H263_ENCODER)
3701             ff_h263_encode_picture_header(s, picture_number);
3702         break;
3703     case FMT_MPEG1:
3704         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3705             ff_mpeg1_encode_picture_header(s, picture_number);
3706         break;
3707     default:
3708         av_assert0(0);
3709     }
3710     bits= put_bits_count(&s->pb);
3711     s->header_bits= bits - s->last_bits;
3712
3713     for(i=1; i<context_count; i++){
3714         update_duplicate_context_after_me(s->thread_context[i], s);
3715     }
3716     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3717     for(i=1; i<context_count; i++){
3718         merge_context_after_encode(s, s->thread_context[i]);
3719     }
3720     emms_c();
3721     return 0;
3722 }
3723
3724 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3725     const int intra= s->mb_intra;
3726     int i;
3727
3728     s->dct_count[intra]++;
3729
3730     for(i=0; i<64; i++){
3731         int level= block[i];
3732
3733         if(level){
3734             if(level>0){
3735                 s->dct_error_sum[intra][i] += level;
3736                 level -= s->dct_offset[intra][i];
3737                 if(level<0) level=0;
3738             }else{
3739                 s->dct_error_sum[intra][i] -= level;
3740                 level += s->dct_offset[intra][i];
3741                 if(level>0) level=0;
3742             }
3743             block[i]= level;
3744         }
3745     }
3746 }
3747
3748 static int dct_quantize_trellis_c(MpegEncContext *s,
3749                                   int16_t *block, int n,
3750                                   int qscale, int *overflow){
3751     const int *qmat;
3752     const uint8_t *scantable= s->intra_scantable.scantable;
3753     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3754     int max=0;
3755     unsigned int threshold1, threshold2;
3756     int bias=0;
3757     int run_tab[65];
3758     int level_tab[65];
3759     int score_tab[65];
3760     int survivor[65];
3761     int survivor_count;
3762     int last_run=0;
3763     int last_level=0;
3764     int last_score= 0;
3765     int last_i;
3766     int coeff[2][64];
3767     int coeff_count[64];
3768     int qmul, qadd, start_i, last_non_zero, i, dc;
3769     const int esc_length= s->ac_esc_length;
3770     uint8_t * length;
3771     uint8_t * last_length;
3772     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3773
3774     s->fdsp.fdct(block);
3775
3776     if(s->dct_error_sum)
3777         s->denoise_dct(s, block);
3778     qmul= qscale*16;
3779     qadd= ((qscale-1)|1)*8;
3780
3781     if (s->mb_intra) {
3782         int q;
3783         if (!s->h263_aic) {
3784             if (n < 4)
3785                 q = s->y_dc_scale;
3786             else
3787                 q = s->c_dc_scale;
3788             q = q << 3;
3789         } else{
3790             /* For AIC we skip quant/dequant of INTRADC */
3791             q = 1 << 3;
3792             qadd=0;
3793         }
3794
3795         /* note: block[0] is assumed to be positive */
3796         block[0] = (block[0] + (q >> 1)) / q;
3797         start_i = 1;
3798         last_non_zero = 0;
3799         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3800         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3801             bias= 1<<(QMAT_SHIFT-1);
3802         length     = s->intra_ac_vlc_length;
3803         last_length= s->intra_ac_vlc_last_length;
3804     } else {
3805         start_i = 0;
3806         last_non_zero = -1;
3807         qmat = s->q_inter_matrix[qscale];
3808         length     = s->inter_ac_vlc_length;
3809         last_length= s->inter_ac_vlc_last_length;
3810     }
3811     last_i= start_i;
3812
3813     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3814     threshold2= (threshold1<<1);
3815
3816     for(i=63; i>=start_i; i--) {
3817         const int j = scantable[i];
3818         int level = block[j] * qmat[j];
3819
3820         if(((unsigned)(level+threshold1))>threshold2){
3821             last_non_zero = i;
3822             break;
3823         }
3824     }
3825
3826     for(i=start_i; i<=last_non_zero; i++) {
3827         const int j = scantable[i];
3828         int level = block[j] * qmat[j];
3829
3830 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3831 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3832         if(((unsigned)(level+threshold1))>threshold2){
3833             if(level>0){
3834                 level= (bias + level)>>QMAT_SHIFT;
3835                 coeff[0][i]= level;
3836                 coeff[1][i]= level-1;
3837 //                coeff[2][k]= level-2;
3838             }else{
3839                 level= (bias - level)>>QMAT_SHIFT;
3840                 coeff[0][i]= -level;
3841                 coeff[1][i]= -level+1;
3842 //                coeff[2][k]= -level+2;
3843             }
3844             coeff_count[i]= FFMIN(level, 2);
3845             av_assert2(coeff_count[i]);
3846             max |=level;
3847         }else{
3848             coeff[0][i]= (level>>31)|1;
3849             coeff_count[i]= 1;
3850         }
3851     }
3852
3853     *overflow= s->max_qcoeff < max; //overflow might have happened
3854
3855     if(last_non_zero < start_i){
3856         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3857         return last_non_zero;
3858     }
3859
3860     score_tab[start_i]= 0;
3861     survivor[0]= start_i;
3862     survivor_count= 1;
3863
3864     for(i=start_i; i<=last_non_zero; i++){
3865         int level_index, j, zero_distortion;
3866         int dct_coeff= FFABS(block[ scantable[i] ]);
3867         int best_score=256*256*256*120;
3868
3869         if (s->fdsp.fdct == ff_fdct_ifast)
3870             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3871         zero_distortion= dct_coeff*dct_coeff;
3872
3873         for(level_index=0; level_index < coeff_count[i]; level_index++){
3874             int distortion;
3875             int level= coeff[level_index][i];
3876             const int alevel= FFABS(level);
3877             int unquant_coeff;
3878
3879             av_assert2(level);
3880
3881             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3882                 unquant_coeff= alevel*qmul + qadd;
3883             }else{ //MPEG1
3884                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3885                 if(s->mb_intra){
3886                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3887                         unquant_coeff =   (unquant_coeff - 1) | 1;
3888                 }else{
3889                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3890                         unquant_coeff =   (unquant_coeff - 1) | 1;
3891                 }
3892                 unquant_coeff<<= 3;
3893             }
3894
3895             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3896             level+=64;
3897             if((level&(~127)) == 0){
3898                 for(j=survivor_count-1; j>=0; j--){
3899                     int run= i - survivor[j];
3900                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3901                     score += score_tab[i-run];
3902
3903                     if(score < best_score){
3904                         best_score= score;
3905                         run_tab[i+1]= run;
3906                         level_tab[i+1]= level-64;
3907                     }
3908                 }
3909
3910                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3911                     for(j=survivor_count-1; j>=0; j--){
3912                         int run= i - survivor[j];
3913                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3914                         score += score_tab[i-run];
3915                         if(score < last_score){
3916                             last_score= score;
3917                             last_run= run;
3918                             last_level= level-64;
3919                             last_i= i+1;
3920                         }
3921                     }
3922                 }
3923             }else{
3924                 distortion += esc_length*lambda;
3925                 for(j=survivor_count-1; j>=0; j--){
3926                     int run= i - survivor[j];
3927                     int score= distortion + score_tab[i-run];
3928
3929                     if(score < best_score){
3930                         best_score= score;
3931                         run_tab[i+1]= run;
3932                         level_tab[i+1]= level-64;
3933                     }
3934                 }
3935
3936                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3937                   for(j=survivor_count-1; j>=0; j--){
3938                         int run= i - survivor[j];
3939                         int score= distortion + score_tab[i-run];
3940                         if(score < last_score){
3941                             last_score= score;
3942                             last_run= run;
3943                             last_level= level-64;
3944                             last_i= i+1;
3945                         }
3946                     }
3947                 }
3948             }
3949         }
3950
3951         score_tab[i+1]= best_score;
3952
3953         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3954         if(last_non_zero <= 27){
3955             for(; survivor_count; survivor_count--){
3956                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3957                     break;
3958             }
3959         }else{
3960             for(; survivor_count; survivor_count--){
3961                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3962                     break;
3963             }
3964         }
3965
3966         survivor[ survivor_count++ ]= i+1;
3967     }
3968
3969     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3970         last_score= 256*256*256*120;
3971         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3972             int score= score_tab[i];
3973             if(i) score += lambda*2; //FIXME exacter?
3974
3975             if(score < last_score){
3976                 last_score= score;
3977                 last_i= i;
3978                 last_level= level_tab[i];
3979                 last_run= run_tab[i];
3980             }
3981         }
3982     }
3983
3984     s->coded_score[n] = last_score;
3985
3986     dc= FFABS(block[0]);
3987     last_non_zero= last_i - 1;
3988     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3989
3990     if(last_non_zero < start_i)
3991         return last_non_zero;
3992
3993     if(last_non_zero == 0 && start_i == 0){
3994         int best_level= 0;
3995         int best_score= dc * dc;
3996
3997         for(i=0; i<coeff_count[0]; i++){
3998             int level= coeff[i][0];
3999             int alevel= FFABS(level);
4000             int unquant_coeff, score, distortion;
4001
4002             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4003                     unquant_coeff= (alevel*qmul + qadd)>>3;
4004             }else{ //MPEG1
4005                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
4006                     unquant_coeff =   (unquant_coeff - 1) | 1;
4007             }
4008             unquant_coeff = (unquant_coeff + 4) >> 3;
4009             unquant_coeff<<= 3 + 3;
4010
4011             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4012             level+=64;
4013             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4014             else                    score= distortion + esc_length*lambda;
4015
4016             if(score < best_score){
4017                 best_score= score;
4018                 best_level= level - 64;
4019             }
4020         }
4021         block[0]= best_level;
4022         s->coded_score[n] = best_score - dc*dc;
4023         if(best_level == 0) return -1;
4024         else                return last_non_zero;
4025     }
4026
4027     i= last_i;
4028     av_assert2(last_level);
4029
4030     block[ perm_scantable[last_non_zero] ]= last_level;
4031     i -= last_run + 1;
4032
4033     for(; i>start_i; i -= run_tab[i] + 1){
4034         block[ perm_scantable[i-1] ]= level_tab[i];
4035     }
4036
4037     return last_non_zero;
4038 }
4039
4040 //#define REFINE_STATS 1
4041 static int16_t basis[64][64];
4042
4043 static void build_basis(uint8_t *perm){
4044     int i, j, x, y;
4045     emms_c();
4046     for(i=0; i<8; i++){
4047         for(j=0; j<8; j++){
4048             for(y=0; y<8; y++){
4049                 for(x=0; x<8; x++){
4050                     double s= 0.25*(1<<BASIS_SHIFT);
4051                     int index= 8*i + j;
4052                     int perm_index= perm[index];
4053                     if(i==0) s*= sqrt(0.5);
4054                     if(j==0) s*= sqrt(0.5);
4055                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4056                 }
4057             }
4058         }
4059     }
4060 }
4061
4062 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4063                         int16_t *block, int16_t *weight, int16_t *orig,
4064                         int n, int qscale){
4065     int16_t rem[64];
4066     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4067     const uint8_t *scantable= s->intra_scantable.scantable;
4068     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4069 //    unsigned int threshold1, threshold2;
4070 //    int bias=0;
4071     int run_tab[65];
4072     int prev_run=0;
4073     int prev_level=0;
4074     int qmul, qadd, start_i, last_non_zero, i, dc;
4075     uint8_t * length;
4076     uint8_t * last_length;
4077     int lambda;
4078     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4079 #ifdef REFINE_STATS
4080 static int count=0;
4081 static int after_last=0;
4082 static int to_zero=0;
4083 static int from_zero=0;
4084 static int raise=0;
4085 static int lower=0;
4086 static int messed_sign=0;
4087 #endif
4088
4089     if(basis[0][0] == 0)
4090         build_basis(s->idsp.idct_permutation);
4091
4092     qmul= qscale*2;
4093     qadd= (qscale-1)|1;
4094     if (s->mb_intra) {
4095         if (!s->h263_aic) {
4096             if (n < 4)
4097                 q = s->y_dc_scale;
4098             else
4099                 q = s->c_dc_scale;
4100         } else{
4101             /* For AIC we skip quant/dequant of INTRADC */
4102             q = 1;
4103             qadd=0;
4104         }
4105         q <<= RECON_SHIFT-3;
4106         /* note: block[0] is assumed to be positive */
4107         dc= block[0]*q;
4108 //        block[0] = (block[0] + (q >> 1)) / q;
4109         start_i = 1;
4110 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4111 //            bias= 1<<(QMAT_SHIFT-1);
4112         length     = s->intra_ac_vlc_length;
4113         last_length= s->intra_ac_vlc_last_length;
4114     } else {
4115         dc= 0;
4116         start_i = 0;
4117         length     = s->inter_ac_vlc_length;
4118         last_length= s->inter_ac_vlc_last_length;
4119     }
4120     last_non_zero = s->block_last_index[n];
4121
4122 #ifdef REFINE_STATS
4123 {START_TIMER
4124 #endif
4125     dc += (1<<(RECON_SHIFT-1));
4126     for(i=0; i<64; i++){
4127         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4128     }
4129 #ifdef REFINE_STATS
4130 STOP_TIMER("memset rem[]")}
4131 #endif
4132     sum=0;
4133     for(i=0; i<64; i++){
4134         int one= 36;
4135         int qns=4;
4136         int w;
4137
4138         w= FFABS(weight[i]) + qns*one;
4139         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4140
4141         weight[i] = w;
4142 //        w=weight[i] = (63*qns + (w/2)) / w;
4143
4144         av_assert2(w>0);
4145         av_assert2(w<(1<<6));
4146         sum += w*w;
4147     }
4148     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4149 #ifdef REFINE_STATS
4150 {START_TIMER
4151 #endif
4152     run=0;
4153     rle_index=0;
4154     for(i=start_i; i<=last_non_zero; i++){
4155         int j= perm_scantable[i];
4156         const int level= block[j];
4157         int coeff;
4158
4159         if(level){
4160             if(level<0) coeff= qmul*level - qadd;
4161             else        coeff= qmul*level + qadd;
4162             run_tab[rle_index++]=run;
4163             run=0;
4164
4165             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4166         }else{
4167             run++;
4168         }
4169     }
4170 #ifdef REFINE_STATS
4171 if(last_non_zero>0){
4172 STOP_TIMER("init rem[]")
4173 }
4174 }
4175
4176 {START_TIMER
4177 #endif
4178     for(;;){
4179         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4180         int best_coeff=0;
4181         int best_change=0;
4182         int run2, best_unquant_change=0, analyze_gradient;
4183 #ifdef REFINE_STATS
4184 {START_TIMER
4185 #endif
4186         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4187
4188         if(analyze_gradient){
4189 #ifdef REFINE_STATS
4190 {START_TIMER
4191 #endif
4192             for(i=0; i<64; i++){
4193                 int w= weight[i];
4194
4195                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4196             }
4197 #ifdef REFINE_STATS
4198 STOP_TIMER("rem*w*w")}
4199 {START_TIMER
4200 #endif
4201             s->fdsp.fdct(d1);
4202 #ifdef REFINE_STATS
4203 STOP_TIMER("dct")}
4204 #endif
4205         }
4206
4207         if(start_i){
4208             const int level= block[0];
4209             int change, old_coeff;
4210
4211             av_assert2(s->mb_intra);
4212
4213             old_coeff= q*level;
4214
4215             for(change=-1; change<=1; change+=2){
4216                 int new_level= level + change;
4217                 int score, new_coeff;
4218
4219                 new_coeff= q*new_level;
4220                 if(new_coeff >= 2048 || new_coeff < 0)
4221                     continue;
4222
4223                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4224                                                   new_coeff - old_coeff);
4225                 if(score<best_score){
4226                     best_score= score;
4227                     best_coeff= 0;
4228                     best_change= change;
4229                     best_unquant_change= new_coeff - old_coeff;
4230                 }
4231             }
4232         }
4233
4234         run=0;
4235         rle_index=0;
4236         run2= run_tab[rle_index++];
4237         prev_level=0;
4238         prev_run=0;
4239
4240         for(i=start_i; i<64; i++){
4241             int j= perm_scantable[i];
4242             const int level= block[j];
4243             int change, old_coeff;
4244
4245             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4246                 break;
4247
4248             if(level){
4249                 if(level<0) old_coeff= qmul*level - qadd;
4250                 else        old_coeff= qmul*level + qadd;
4251                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4252             }else{
4253                 old_coeff=0;
4254                 run2--;
4255                 av_assert2(run2>=0 || i >= last_non_zero );
4256             }
4257
4258             for(change=-1; change<=1; change+=2){
4259                 int new_level= level + change;
4260                 int score, new_coeff, unquant_change;
4261
4262                 score=0;
4263                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4264                    continue;
4265
4266                 if(new_level){
4267                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4268                     else            new_coeff= qmul*new_level + qadd;
4269                     if(new_coeff >= 2048 || new_coeff <= -2048)
4270                         continue;
4271                     //FIXME check for overflow
4272
4273                     if(level){
4274                         if(level < 63 && level > -63){
4275                             if(i < last_non_zero)
4276                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4277                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4278                             else
4279                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4280                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4281                         }
4282                     }else{
4283                         av_assert2(FFABS(new_level)==1);
4284
4285                         if(analyze_gradient){
4286                             int g= d1[ scantable[i] ];
4287                             if(g && (g^new_level) >= 0)
4288                                 continue;
4289                         }
4290
4291                         if(i < last_non_zero){
4292                             int next_i= i + run2 + 1;
4293                             int next_level= block[ perm_scantable[next_i] ] + 64;
4294
4295                             if(next_level&(~127))
4296                                 next_level= 0;
4297
4298                             if(next_i < last_non_zero)
4299                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4300                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4301                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4302                             else
4303                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4304                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4305                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4306                         }else{
4307                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4308                             if(prev_level){
4309                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4310                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4311                             }
4312                         }
4313                     }
4314                 }else{
4315                     new_coeff=0;
4316                     av_assert2(FFABS(level)==1);
4317
4318                     if(i < last_non_zero){
4319                         int next_i= i + run2 + 1;
4320                         int next_level= block[ perm_scantable[next_i] ] + 64;
4321
4322                         if(next_level&(~127))
4323                             next_level= 0;
4324
4325                         if(next_i < last_non_zero)
4326                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4327                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4328                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4329                         else
4330                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4331                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4332                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4333                     }else{
4334                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4335                         if(prev_level){
4336                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4337                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4338                         }
4339                     }
4340                 }
4341
4342                 score *= lambda;
4343
4344                 unquant_change= new_coeff - old_coeff;
4345                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4346
4347                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4348                                                    unquant_change);
4349                 if(score<best_score){
4350                     best_score= score;
4351                     best_coeff= i;
4352                     best_change= change;
4353                     best_unquant_change= unquant_change;
4354                 }
4355             }
4356             if(level){
4357                 prev_level= level + 64;
4358                 if(prev_level&(~127))
4359                     prev_level= 0;
4360                 prev_run= run;
4361                 run=0;
4362             }else{
4363                 run++;
4364             }
4365         }
4366 #ifdef REFINE_STATS
4367 STOP_TIMER("iterative step")}
4368 #endif
4369
4370         if(best_change){
4371             int j= perm_scantable[ best_coeff ];
4372
4373             block[j] += best_change;
4374
4375             if(best_coeff > last_non_zero){
4376                 last_non_zero= best_coeff;
4377                 av_assert2(block[j]);
4378 #ifdef REFINE_STATS
4379 after_last++;
4380 #endif
4381             }else{
4382 #ifdef REFINE_STATS
4383 if(block[j]){
4384     if(block[j] - best_change){
4385         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4386             raise++;
4387         }else{
4388             lower++;
4389         }
4390     }else{
4391         from_zero++;
4392     }
4393 }else{
4394     to_zero++;
4395 }
4396 #endif
4397                 for(; last_non_zero>=start_i; last_non_zero--){
4398                     if(block[perm_scantable[last_non_zero]])
4399                         break;
4400                 }
4401             }
4402 #ifdef REFINE_STATS
4403 count++;
4404 if(256*256*256*64 % count == 0){
4405     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4406 }
4407 #endif
4408             run=0;
4409             rle_index=0;
4410             for(i=start_i; i<=last_non_zero; i++){
4411                 int j= perm_scantable[i];
4412                 const int level= block[j];
4413
4414                  if(level){
4415                      run_tab[rle_index++]=run;
4416                      run=0;
4417                  }else{
4418                      run++;
4419                  }
4420             }
4421
4422             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4423         }else{
4424             break;
4425         }
4426     }
4427 #ifdef REFINE_STATS
4428 if(last_non_zero>0){
4429 STOP_TIMER("iterative search")
4430 }
4431 }
4432 #endif
4433
4434     return last_non_zero;
4435 }
4436
4437 int ff_dct_quantize_c(MpegEncContext *s,
4438                         int16_t *block, int n,
4439                         int qscale, int *overflow)
4440 {
4441     int i, j, level, last_non_zero, q, start_i;
4442     const int *qmat;
4443     const uint8_t *scantable= s->intra_scantable.scantable;
4444     int bias;
4445     int max=0;
4446     unsigned int threshold1, threshold2;
4447
4448     s->fdsp.fdct(block);
4449
4450     if(s->dct_error_sum)
4451         s->denoise_dct(s, block);
4452
4453     if (s->mb_intra) {
4454         if (!s->h263_aic) {
4455             if (n < 4)
4456                 q = s->y_dc_scale;
4457             else
4458                 q = s->c_dc_scale;
4459             q = q << 3;
4460         } else
4461             /* For AIC we skip quant/dequant of INTRADC */
4462             q = 1 << 3;
4463
4464         /* note: block[0] is assumed to be positive */
4465         block[0] = (block[0] + (q >> 1)) / q;
4466         start_i = 1;
4467         last_non_zero = 0;
4468         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4469         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4470     } else {
4471         start_i = 0;
4472         last_non_zero = -1;
4473         qmat = s->q_inter_matrix[qscale];
4474         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4475     }
4476     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4477     threshold2= (threshold1<<1);
4478     for(i=63;i>=start_i;i--) {
4479         j = scantable[i];
4480         level = block[j] * qmat[j];
4481
4482         if(((unsigned)(level+threshold1))>threshold2){
4483             last_non_zero = i;
4484             break;
4485         }else{
4486             block[j]=0;
4487         }
4488     }
4489     for(i=start_i; i<=last_non_zero; i++) {
4490         j = scantable[i];
4491         level = block[j] * qmat[j];
4492
4493 //        if(   bias+level >= (1<<QMAT_SHIFT)
4494 //           || bias-level >= (1<<QMAT_SHIFT)){
4495         if(((unsigned)(level+threshold1))>threshold2){
4496             if(level>0){
4497                 level= (bias + level)>>QMAT_SHIFT;
4498                 block[j]= level;
4499             }else{
4500                 level= (bias - level)>>QMAT_SHIFT;
4501                 block[j]= -level;
4502             }
4503             max |=level;
4504         }else{
4505             block[j]=0;
4506         }
4507     }
4508     *overflow= s->max_qcoeff < max; //overflow might have happened
4509
4510     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4511     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4512         ff_block_permute(block, s->idsp.idct_permutation,
4513                          scantable, last_non_zero);
4514
4515     return last_non_zero;
4516 }
4517
4518 #define OFFSET(x) offsetof(MpegEncContext, x)
4519 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4520 static const AVOption h263_options[] = {
4521     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4522     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4523     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4524     FF_MPV_COMMON_OPTS
4525     { NULL },
4526 };
4527
4528 static const AVClass h263_class = {
4529     .class_name = "H.263 encoder",
4530     .item_name  = av_default_item_name,
4531     .option     = h263_options,
4532     .version    = LIBAVUTIL_VERSION_INT,
4533 };
4534
4535 AVCodec ff_h263_encoder = {
4536     .name           = "h263",
4537     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4538     .type           = AVMEDIA_TYPE_VIDEO,
4539     .id             = AV_CODEC_ID_H263,
4540     .priv_data_size = sizeof(MpegEncContext),
4541     .init           = ff_mpv_encode_init,
4542     .encode2        = ff_mpv_encode_picture,
4543     .close          = ff_mpv_encode_end,
4544     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4545     .priv_class     = &h263_class,
4546 };
4547
4548 static const AVOption h263p_options[] = {
4549     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4550     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4551     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4552     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4553     FF_MPV_COMMON_OPTS
4554     { NULL },
4555 };
4556 static const AVClass h263p_class = {
4557     .class_name = "H.263p encoder",
4558     .item_name  = av_default_item_name,
4559     .option     = h263p_options,
4560     .version    = LIBAVUTIL_VERSION_INT,
4561 };
4562
4563 AVCodec ff_h263p_encoder = {
4564     .name           = "h263p",
4565     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4566     .type           = AVMEDIA_TYPE_VIDEO,
4567     .id             = AV_CODEC_ID_H263P,
4568     .priv_data_size = sizeof(MpegEncContext),
4569     .init           = ff_mpv_encode_init,
4570     .encode2        = ff_mpv_encode_picture,
4571     .close          = ff_mpv_encode_end,
4572     .capabilities   = CODEC_CAP_SLICE_THREADS,
4573     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4574     .priv_class     = &h263p_class,
4575 };
4576
4577 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4578
4579 AVCodec ff_msmpeg4v2_encoder = {
4580     .name           = "msmpeg4v2",
4581     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4582     .type           = AVMEDIA_TYPE_VIDEO,
4583     .id             = AV_CODEC_ID_MSMPEG4V2,
4584     .priv_data_size = sizeof(MpegEncContext),
4585     .init           = ff_mpv_encode_init,
4586     .encode2        = ff_mpv_encode_picture,
4587     .close          = ff_mpv_encode_end,
4588     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4589     .priv_class     = &msmpeg4v2_class,
4590 };
4591
4592 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4593
4594 AVCodec ff_msmpeg4v3_encoder = {
4595     .name           = "msmpeg4",
4596     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4597     .type           = AVMEDIA_TYPE_VIDEO,
4598     .id             = AV_CODEC_ID_MSMPEG4V3,
4599     .priv_data_size = sizeof(MpegEncContext),
4600     .init           = ff_mpv_encode_init,
4601     .encode2        = ff_mpv_encode_picture,
4602     .close          = ff_mpv_encode_end,
4603     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4604     .priv_class     = &msmpeg4v3_class,
4605 };
4606
4607 FF_MPV_GENERIC_CLASS(wmv1)
4608
4609 AVCodec ff_wmv1_encoder = {
4610     .name           = "wmv1",
4611     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4612     .type           = AVMEDIA_TYPE_VIDEO,
4613     .id             = AV_CODEC_ID_WMV1,
4614     .priv_data_size = sizeof(MpegEncContext),
4615     .init           = ff_mpv_encode_init,
4616     .encode2        = ff_mpv_encode_picture,
4617     .close          = ff_mpv_encode_end,
4618     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4619     .priv_class     = &wmv1_class,
4620 };