git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "h261.h"
  44 #include "h263.h"
  45 #include "mjpegenc_common.h"
  46 #include "mathops.h"
  47 #include "mpegutils.h"
  48 #include "mjpegenc.h"
  49 #include "msmpeg4.h"
  50 #include "pixblockdsp.h"
  51 #include "qpeldsp.h"
  52 #include "faandct.h"
  53 #include "thread.h"
  54 #include "aandcttab.h"
  55 #include "flv.h"
  56 #include "mpeg4video.h"
  57 #include "internal.h"
  58 #include "bytestream.h"
  59 #include <limits.h>
  60 #include "sp5x.h"
  61
  62 #define QUANT_BIAS_SHIFT 8
  63
  64 #define QMAT_SHIFT_MMX 16
  65 #define QMAT_SHIFT 21
  66
  67 static int encode_picture(MpegEncContext *s, int picture_number);
  68 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  69 static int sse_mb(MpegEncContext *s);
  70 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  71 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  72
  73 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  74 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  75
  76 const AVOption ff_mpv_generic_options[] = {
  77     FF_MPV_COMMON_OPTS
  78     { NULL },
  79 };
  80
  81 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  82                        uint16_t (*qmat16)[2][64],
  83                        const uint16_t *quant_matrix,
  84                        int bias, int qmin, int qmax, int intra)
  85 {
  86     FDCTDSPContext *fdsp = &s->fdsp;
  87     int qscale;
  88     int shift = 0;
  89
  90     for (qscale = qmin; qscale <= qmax; qscale++) {
  91         int i;
  92         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  93 #if CONFIG_FAANDCT
  94             fdsp->fdct == ff_faandct            ||
  95 #endif /* CONFIG_FAANDCT */
  96             fdsp->fdct == ff_jpeg_fdct_islow_10) {
  97             for (i = 0; i < 64; i++) {
  98                 const int j = s->idsp.idct_permutation[i];
  99                 int64_t den = (int64_t) qscale * quant_matrix[j];
 100                 /* 16 <= qscale * quant_matrix[i] <= 7905
 101                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 102                  *             19952 <=              x  <= 249205026
 103                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 104                  *           3444240 >= (1 << 36) / (x) >= 275 */
 105
 106                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 107             }
 108         } else if (fdsp->fdct == ff_fdct_ifast) {
 109             for (i = 0; i < 64; i++) {
 110                 const int j = s->idsp.idct_permutation[i];
 111                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
 112                 /* 16 <= qscale * quant_matrix[i] <= 7905
 113                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 114                  *             19952 <=              x  <= 249205026
 115                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 116                  *           3444240 >= (1 << 36) / (x) >= 275 */
 117
 118                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
 119             }
 120         } else {
 121             for (i = 0; i < 64; i++) {
 122                 const int j = s->idsp.idct_permutation[i];
 123                 int64_t den = (int64_t) qscale * quant_matrix[j];
 124                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 125                  * Assume x = qscale * quant_matrix[i]
 126                  * So             16 <=              x  <= 7905
 127                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 128                  * so          32768 >= (1 << 19) / (x) >= 67 */
 129                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 130                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 131                 //                    (qscale * quant_matrix[i]);
 132                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
 133
 134                 if (qmat16[qscale][0][i] == 0 ||
 135                     qmat16[qscale][0][i] == 128 * 256)
 136                     qmat16[qscale][0][i] = 128 * 256 - 1;
 137                 qmat16[qscale][1][i] =
 138                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 139                                 qmat16[qscale][0][i]);
 140             }
 141         }
 142
 143         for (i = intra; i < 64; i++) {
 144             int64_t max = 8191;
 145             if (fdsp->fdct == ff_fdct_ifast) {
 146                 max = (8191LL * ff_aanscales[i]) >> 14;
 147             }
 148             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 149                 shift++;
 150             }
 151         }
 152     }
 153     if (shift) {
 154         av_log(NULL, AV_LOG_INFO,
 155                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 156                QMAT_SHIFT - shift);
 157     }
 158 }
 159
 160 static inline void update_qscale(MpegEncContext *s)
 161 {
 162     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 163                 (FF_LAMBDA_SHIFT + 7);
 164     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 165
 166     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 167                  FF_LAMBDA_SHIFT;
 168 }
 169
 170 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 171 {
 172     int i;
 173
 174     if (matrix) {
 175         put_bits(pb, 1, 1);
 176         for (i = 0; i < 64; i++) {
 177             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 178         }
 179     } else
 180         put_bits(pb, 1, 0);
 181 }
 182
 183 /**
 184  * init s->current_picture.qscale_table from s->lambda_table
 185  */
 186 void ff_init_qscale_tab(MpegEncContext *s)
 187 {
 188     int8_t * const qscale_table = s->current_picture.qscale_table;
 189     int i;
 190
 191     for (i = 0; i < s->mb_num; i++) {
 192         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 193         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 194         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 195                                                   s->avctx->qmax);
 196     }
 197 }
 198
 199 static void update_duplicate_context_after_me(MpegEncContext *dst,
 200                                               MpegEncContext *src)
 201 {
 202 #define COPY(a) dst->a= src->a
 203     COPY(pict_type);
 204     COPY(current_picture);
 205     COPY(f_code);
 206     COPY(b_code);
 207     COPY(qscale);
 208     COPY(lambda);
 209     COPY(lambda2);
 210     COPY(picture_in_gop_number);
 211     COPY(gop_picture_number);
 212     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 213     COPY(progressive_frame);    // FIXME don't set in encode_header
 214     COPY(partitioned_frame);    // FIXME don't set in encode_header
 215 #undef COPY
 216 }
 217
 218 /**
 219  * Set the given MpegEncContext to defaults for encoding.
 220  * the changed fields will not depend upon the prior state of the MpegEncContext.
 221  */
 222 static void mpv_encode_defaults(MpegEncContext *s)
 223 {
 224     int i;
 225     ff_mpv_common_defaults(s);
 226
 227     for (i = -16; i < 16; i++) {
 228         default_fcode_tab[i + MAX_MV] = 1;
 229     }
 230     s->me.mv_penalty = default_mv_penalty;
 231     s->fcode_tab     = default_fcode_tab;
 232
 233     s->input_picture_number  = 0;
 234     s->picture_in_gop_number = 0;
 235 }
 236
 237 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 238     if (ARCH_X86)
 239         ff_dct_encode_init_x86(s);
 240
 241     if (CONFIG_H263_ENCODER)
 242         ff_h263dsp_init(&s->h263dsp);
 243     if (!s->dct_quantize)
 244         s->dct_quantize = ff_dct_quantize_c;
 245     if (!s->denoise_dct)
 246         s->denoise_dct  = denoise_dct_c;
 247     s->fast_dct_quantize = s->dct_quantize;
 248     if (s->avctx->trellis)
 249         s->dct_quantize  = dct_quantize_trellis_c;
 250
 251     return 0;
 252 }
 253
 254 /* init video encoder */
 255 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 256 {
 257     MpegEncContext *s = avctx->priv_data;
 258     int i, ret, format_supported;
 259
 260     mpv_encode_defaults(s);
 261
 262     switch (avctx->codec_id) {
 263     case AV_CODEC_ID_MPEG2VIDEO:
 264         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 265             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 266             av_log(avctx, AV_LOG_ERROR,
 267                    "only YUV420 and YUV422 are supported\n");
 268             return -1;
 269         }
 270         break;
 271     case AV_CODEC_ID_MJPEG:
 272     case AV_CODEC_ID_AMV:
 273         format_supported = 0;
 274         /* JPEG color space */
 275         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 276             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 277             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
 278             (avctx->color_range == AVCOL_RANGE_JPEG &&
 279              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 280               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 281               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
 282             format_supported = 1;
 283         /* MPEG color space */
 284         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 285                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 286                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 287                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
 288             format_supported = 1;
 289
 290         if (!format_supported) {
 291             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 292             return -1;
 293         }
 294         break;
 295     default:
 296         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 297             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 298             return -1;
 299         }
 300     }
 301
 302     switch (avctx->pix_fmt) {
 303     case AV_PIX_FMT_YUVJ444P:
 304     case AV_PIX_FMT_YUV444P:
 305         s->chroma_format = CHROMA_444;
 306         break;
 307     case AV_PIX_FMT_YUVJ422P:
 308     case AV_PIX_FMT_YUV422P:
 309         s->chroma_format = CHROMA_422;
 310         break;
 311     case AV_PIX_FMT_YUVJ420P:
 312     case AV_PIX_FMT_YUV420P:
 313     default:
 314         s->chroma_format = CHROMA_420;
 315         break;
 316     }
 317
 318     s->bit_rate = avctx->bit_rate;
 319     s->width    = avctx->width;
 320     s->height   = avctx->height;
 321     if (avctx->gop_size > 600 &&
 322         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 323         av_log(avctx, AV_LOG_WARNING,
 324                "keyframe interval too large!, reducing it from %d to %d\n",
 325                avctx->gop_size, 600);
 326         avctx->gop_size = 600;
 327     }
 328     s->gop_size     = avctx->gop_size;
 329     s->avctx        = avctx;
 330     s->flags        = avctx->flags;
 331     s->flags2       = avctx->flags2;
 332     if (avctx->max_b_frames > MAX_B_FRAMES) {
 333         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 334                "is %d.\n", MAX_B_FRAMES);
 335         avctx->max_b_frames = MAX_B_FRAMES;
 336     }
 337     s->max_b_frames = avctx->max_b_frames;
 338     s->codec_id     = avctx->codec->id;
 339     s->strict_std_compliance = avctx->strict_std_compliance;
 340     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 341     s->mpeg_quant         = avctx->mpeg_quant;
 342     s->rtp_mode           = !!avctx->rtp_payload_size;
 343     s->intra_dc_precision = avctx->intra_dc_precision;
 344
 345     // workaround some differences between how applications specify dc precission
 346     if (s->intra_dc_precision < 0) {
 347         s->intra_dc_precision += 8;
 348     } else if (s->intra_dc_precision >= 8)
 349         s->intra_dc_precision -= 8;
 350
 351     if (s->intra_dc_precision < 0) {
 352         av_log(avctx, AV_LOG_ERROR,
 353                 "intra dc precision must be positive, note some applications use"
 354                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
 355         return AVERROR(EINVAL);
 356     }
 357
 358     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
 359         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
 360         return AVERROR(EINVAL);
 361     }
 362     s->user_specified_pts = AV_NOPTS_VALUE;
 363
 364     if (s->gop_size <= 1) {
 365         s->intra_only = 1;
 366         s->gop_size   = 12;
 367     } else {
 368         s->intra_only = 0;
 369     }
 370
 371     s->me_method = avctx->me_method;
 372
 373     /* Fixed QSCALE */
 374     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 375
 376 #if FF_API_MPV_OPT
 377     FF_DISABLE_DEPRECATION_WARNINGS
 378     if (avctx->border_masking != 0.0)
 379         s->border_masking = avctx->border_masking;
 380     FF_ENABLE_DEPRECATION_WARNINGS
 381 #endif
 382
 383     s->adaptive_quant = (s->avctx->lumi_masking ||
 384                          s->avctx->dark_masking ||
 385                          s->avctx->temporal_cplx_masking ||
 386                          s->avctx->spatial_cplx_masking  ||
 387                          s->avctx->p_masking      ||
 388                          s->border_masking ||
 389                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 390                         !s->fixed_qscale;
 391
 392     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 393
 394     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 395         switch(avctx->codec_id) {
 396         case AV_CODEC_ID_MPEG1VIDEO:
 397         case AV_CODEC_ID_MPEG2VIDEO:
 398             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
 399             break;
 400         case AV_CODEC_ID_MPEG4:
 401         case AV_CODEC_ID_MSMPEG4V1:
 402         case AV_CODEC_ID_MSMPEG4V2:
 403         case AV_CODEC_ID_MSMPEG4V3:
 404             if       (avctx->rc_max_rate >= 15000000) {
 405                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
 406             } else if(avctx->rc_max_rate >=  2000000) {
 407                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
 408             } else if(avctx->rc_max_rate >=   384000) {
 409                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
 410             } else
 411                 avctx->rc_buffer_size = 40;
 412             avctx->rc_buffer_size *= 16384;
 413             break;
 414         }
 415         if (avctx->rc_buffer_size) {
 416             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 417         }
 418     }
 419
 420     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 421         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 422         return -1;
 423     }
 424
 425     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 426         av_log(avctx, AV_LOG_INFO,
 427                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 428     }
 429
 430     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 431         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 432         return -1;
 433     }
 434
 435     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 436         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 437         return -1;
 438     }
 439
 440     if (avctx->rc_max_rate &&
 441         avctx->rc_max_rate == avctx->bit_rate &&
 442         avctx->rc_max_rate != avctx->rc_min_rate) {
 443         av_log(avctx, AV_LOG_INFO,
 444                "impossible bitrate constraints, this will fail\n");
 445     }
 446
 447     if (avctx->rc_buffer_size &&
 448         avctx->bit_rate * (int64_t)avctx->time_base.num >
 449             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 450         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 451         return -1;
 452     }
 453
 454     if (!s->fixed_qscale &&
 455         avctx->bit_rate * av_q2d(avctx->time_base) >
 456             avctx->bit_rate_tolerance) {
 457         av_log(avctx, AV_LOG_WARNING,
 458                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 459         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 460     }
 461
 462     if (s->avctx->rc_max_rate &&
 463         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 464         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 465          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 466         90000LL * (avctx->rc_buffer_size - 1) >
 467             s->avctx->rc_max_rate * 0xFFFFLL) {
 468         av_log(avctx, AV_LOG_INFO,
 469                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 470                "specified vbv buffer is too large for the given bitrate!\n");
 471     }
 472
 473     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 474         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 475         s->codec_id != AV_CODEC_ID_FLV1) {
 476         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 477         return -1;
 478     }
 479
 480     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 481         av_log(avctx, AV_LOG_ERROR,
 482                "OBMC is only supported with simple mb decision\n");
 483         return -1;
 484     }
 485
 486     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 487         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 488         return -1;
 489     }
 490
 491     if (s->max_b_frames                    &&
 492         s->codec_id != AV_CODEC_ID_MPEG4      &&
 493         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 494         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 495         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 496         return -1;
 497     }
 498     if (s->max_b_frames < 0) {
 499         av_log(avctx, AV_LOG_ERROR,
 500                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 501         return -1;
 502     }
 503
 504     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 505          s->codec_id == AV_CODEC_ID_H263  ||
 506          s->codec_id == AV_CODEC_ID_H263P) &&
 507         (avctx->sample_aspect_ratio.num > 255 ||
 508          avctx->sample_aspect_ratio.den > 255)) {
 509         av_log(avctx, AV_LOG_WARNING,
 510                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 511                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 512         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 513                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 514     }
 515
 516     if ((s->codec_id == AV_CODEC_ID_H263  ||
 517          s->codec_id == AV_CODEC_ID_H263P) &&
 518         (avctx->width  > 2048 ||
 519          avctx->height > 1152 )) {
 520         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 521         return -1;
 522     }
 523     if ((s->codec_id == AV_CODEC_ID_H263  ||
 524          s->codec_id == AV_CODEC_ID_H263P) &&
 525         ((avctx->width &3) ||
 526          (avctx->height&3) )) {
 527         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 528         return -1;
 529     }
 530
 531     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 532         (avctx->width  > 4095 ||
 533          avctx->height > 4095 )) {
 534         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 535         return -1;
 536     }
 537
 538     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 539         (avctx->width  > 16383 ||
 540          avctx->height > 16383 )) {
 541         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 542         return -1;
 543     }
 544
 545     if (s->codec_id == AV_CODEC_ID_RV10 &&
 546         (avctx->width &15 ||
 547          avctx->height&15 )) {
 548         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 549         return AVERROR(EINVAL);
 550     }
 551
 552     if (s->codec_id == AV_CODEC_ID_RV20 &&
 553         (avctx->width &3 ||
 554          avctx->height&3 )) {
 555         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 556         return AVERROR(EINVAL);
 557     }
 558
 559     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 560          s->codec_id == AV_CODEC_ID_WMV2) &&
 561          avctx->width & 1) {
 562          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 563          return -1;
 564     }
 565
 566     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 567         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 568         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 569         return -1;
 570     }
 571
 572     // FIXME mpeg2 uses that too
 573     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 574                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 575         av_log(avctx, AV_LOG_ERROR,
 576                "mpeg2 style quantization not supported by codec\n");
 577         return -1;
 578     }
 579
 580     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 581         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 582         return -1;
 583     }
 584
 585     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 586         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 587         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 588         return -1;
 589     }
 590
 591     if (s->avctx->scenechange_threshold < 1000000000 &&
 592         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 593         av_log(avctx, AV_LOG_ERROR,
 594                "closed gop with scene change detection are not supported yet, "
 595                "set threshold to 1000000000\n");
 596         return -1;
 597     }
 598
 599     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 600         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 601             av_log(avctx, AV_LOG_ERROR,
 602                   "low delay forcing is only available for mpeg2\n");
 603             return -1;
 604         }
 605         if (s->max_b_frames != 0) {
 606             av_log(avctx, AV_LOG_ERROR,
 607                    "b frames cannot be used with low delay\n");
 608             return -1;
 609         }
 610     }
 611
 612     if (s->q_scale_type == 1) {
 613         if (avctx->qmax > 12) {
 614             av_log(avctx, AV_LOG_ERROR,
 615                    "non linear quant only supports qmax <= 12 currently\n");
 616             return -1;
 617         }
 618     }
 619
 620     if (s->avctx->thread_count > 1         &&
 621         s->codec_id != AV_CODEC_ID_MPEG4      &&
 622         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 623         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 624         s->codec_id != AV_CODEC_ID_MJPEG      &&
 625         (s->codec_id != AV_CODEC_ID_H263P)) {
 626         av_log(avctx, AV_LOG_ERROR,
 627                "multi threaded encoding not supported by codec\n");
 628         return -1;
 629     }
 630
 631     if (s->avctx->thread_count < 1) {
 632         av_log(avctx, AV_LOG_ERROR,
 633                "automatic thread number detection not supported by codec, "
 634                "patch welcome\n");
 635         return -1;
 636     }
 637
 638     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 639         s->rtp_mode = 1;
 640
 641     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 642         s->h263_slice_structured = 1;
 643
 644     if (!avctx->time_base.den || !avctx->time_base.num) {
 645         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 646         return -1;
 647     }
 648
 649     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 650         av_log(avctx, AV_LOG_INFO,
 651                "notice: b_frame_strategy only affects the first pass\n");
 652         avctx->b_frame_strategy = 0;
 653     }
 654
 655     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 656     if (i > 1) {
 657         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 658         avctx->time_base.den /= i;
 659         avctx->time_base.num /= i;
 660         //return -1;
 661     }
 662
 663     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 664         // (a + x * 3 / 8) / x
 665         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 666         s->inter_quant_bias = 0;
 667     } else {
 668         s->intra_quant_bias = 0;
 669         // (a - x / 4) / x
 670         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 671     }
 672
 673     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 674         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 675         return AVERROR(EINVAL);
 676     }
 677
 678     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 679         s->intra_quant_bias = avctx->intra_quant_bias;
 680     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 681         s->inter_quant_bias = avctx->inter_quant_bias;
 682
 683     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 684
 685     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 686         s->avctx->time_base.den > (1 << 16) - 1) {
 687         av_log(avctx, AV_LOG_ERROR,
 688                "timebase %d/%d not supported by MPEG 4 standard, "
 689                "the maximum admitted value for the timebase denominator "
 690                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 691                (1 << 16) - 1);
 692         return -1;
 693     }
 694     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 695
 696     switch (avctx->codec->id) {
 697     case AV_CODEC_ID_MPEG1VIDEO:
 698         s->out_format = FMT_MPEG1;
 699         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 700         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 701         break;
 702     case AV_CODEC_ID_MPEG2VIDEO:
 703         s->out_format = FMT_MPEG1;
 704         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 705         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 706         s->rtp_mode   = 1;
 707         break;
 708     case AV_CODEC_ID_MJPEG:
 709     case AV_CODEC_ID_AMV:
 710         s->out_format = FMT_MJPEG;
 711         s->intra_only = 1; /* force intra only for jpeg */
 712         if (!CONFIG_MJPEG_ENCODER ||
 713             ff_mjpeg_encode_init(s) < 0)
 714             return -1;
 715         avctx->delay = 0;
 716         s->low_delay = 1;
 717         break;
 718     case AV_CODEC_ID_H261:
 719         if (!CONFIG_H261_ENCODER)
 720             return -1;
 721         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 722             av_log(avctx, AV_LOG_ERROR,
 723                    "The specified picture size of %dx%d is not valid for the "
 724                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 725                     s->width, s->height);
 726             return -1;
 727         }
 728         s->out_format = FMT_H261;
 729         avctx->delay  = 0;
 730         s->low_delay  = 1;
 731         break;
 732     case AV_CODEC_ID_H263:
 733         if (!CONFIG_H263_ENCODER)
 734             return -1;
 735         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 736                              s->width, s->height) == 8) {
 737             av_log(avctx, AV_LOG_ERROR,
 738                    "The specified picture size of %dx%d is not valid for "
 739                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 740                    "352x288, 704x576, and 1408x1152. "
 741                    "Try H.263+.\n", s->width, s->height);
 742             return -1;
 743         }
 744         s->out_format = FMT_H263;
 745         avctx->delay  = 0;
 746         s->low_delay  = 1;
 747         break;
 748     case AV_CODEC_ID_H263P:
 749         s->out_format = FMT_H263;
 750         s->h263_plus  = 1;
 751         /* Fx */
 752         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 753         s->modified_quant  = s->h263_aic;
 754         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 755         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 756
 757         /* /Fx */
 758         /* These are just to be sure */
 759         avctx->delay = 0;
 760         s->low_delay = 1;
 761         break;
 762     case AV_CODEC_ID_FLV1:
 763         s->out_format      = FMT_H263;
 764         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 765         s->unrestricted_mv = 1;
 766         s->rtp_mode  = 0; /* don't allow GOB */
 767         avctx->delay = 0;
 768         s->low_delay = 1;
 769         break;
 770     case AV_CODEC_ID_RV10:
 771         s->out_format = FMT_H263;
 772         avctx->delay  = 0;
 773         s->low_delay  = 1;
 774         break;
 775     case AV_CODEC_ID_RV20:
 776         s->out_format      = FMT_H263;
 777         avctx->delay       = 0;
 778         s->low_delay       = 1;
 779         s->modified_quant  = 1;
 780         s->h263_aic        = 1;
 781         s->h263_plus       = 1;
 782         s->loop_filter     = 1;
 783         s->unrestricted_mv = 0;
 784         break;
 785     case AV_CODEC_ID_MPEG4:
 786         s->out_format      = FMT_H263;
 787         s->h263_pred       = 1;
 788         s->unrestricted_mv = 1;
 789         s->low_delay       = s->max_b_frames ? 0 : 1;
 790         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 791         break;
 792     case AV_CODEC_ID_MSMPEG4V2:
 793         s->out_format      = FMT_H263;
 794         s->h263_pred       = 1;
 795         s->unrestricted_mv = 1;
 796         s->msmpeg4_version = 2;
 797         avctx->delay       = 0;
 798         s->low_delay       = 1;
 799         break;
 800     case AV_CODEC_ID_MSMPEG4V3:
 801         s->out_format        = FMT_H263;
 802         s->h263_pred         = 1;
 803         s->unrestricted_mv   = 1;
 804         s->msmpeg4_version   = 3;
 805         s->flipflop_rounding = 1;
 806         avctx->delay         = 0;
 807         s->low_delay         = 1;
 808         break;
 809     case AV_CODEC_ID_WMV1:
 810         s->out_format        = FMT_H263;
 811         s->h263_pred         = 1;
 812         s->unrestricted_mv   = 1;
 813         s->msmpeg4_version   = 4;
 814         s->flipflop_rounding = 1;
 815         avctx->delay         = 0;
 816         s->low_delay         = 1;
 817         break;
 818     case AV_CODEC_ID_WMV2:
 819         s->out_format        = FMT_H263;
 820         s->h263_pred         = 1;
 821         s->unrestricted_mv   = 1;
 822         s->msmpeg4_version   = 5;
 823         s->flipflop_rounding = 1;
 824         avctx->delay         = 0;
 825         s->low_delay         = 1;
 826         break;
 827     default:
 828         return -1;
 829     }
 830
 831     avctx->has_b_frames = !s->low_delay;
 832
 833     s->encoding = 1;
 834
 835     s->progressive_frame    =
 836     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 837                                                 CODEC_FLAG_INTERLACED_ME) ||
 838                                 s->alternate_scan);
 839
 840     /* init */
 841     ff_mpv_idct_init(s);
 842     if (ff_mpv_common_init(s) < 0)
 843         return -1;
 844
 845     ff_fdctdsp_init(&s->fdsp, avctx);
 846     ff_me_cmp_init(&s->mecc, avctx);
 847     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 848     ff_pixblockdsp_init(&s->pdsp, avctx);
 849     ff_qpeldsp_init(&s->qdsp);
 850
 851     s->avctx->coded_frame = s->current_picture.f;
 852
 853     if (s->msmpeg4_version) {
 854         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 855                           2 * 2 * (MAX_LEVEL + 1) *
 856                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 857     }
 858     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 859
 860     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 861     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 862     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 863     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 864     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 865     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 866     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 867                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 868     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 869                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 870
 871     if (s->avctx->noise_reduction) {
 872         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 873                           2 * 64 * sizeof(uint16_t), fail);
 874     }
 875
 876     ff_dct_encode_init(s);
 877
 878     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 879         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 880
 881     s->quant_precision = 5;
 882
 883     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 884     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
 885
 886     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 887         ff_h261_encode_init(s);
 888     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 889         ff_h263_encode_init(s);
 890     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 891         ff_msmpeg4_encode_init(s);
 892     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 893         && s->out_format == FMT_MPEG1)
 894         ff_mpeg1_encode_init(s);
 895
 896     /* init q matrix */
 897     for (i = 0; i < 64; i++) {
 898         int j = s->idsp.idct_permutation[i];
 899         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 900             s->mpeg_quant) {
 901             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 902             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 903         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 904             s->intra_matrix[j] =
 905             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 906         } else {
 907             /* mpeg1/2 */
 908             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 909             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 910         }
 911         if (s->avctx->intra_matrix)
 912             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 913         if (s->avctx->inter_matrix)
 914             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 915     }
 916
 917     /* precompute matrix */
 918     /* for mjpeg, we do include qscale in the matrix */
 919     if (s->out_format != FMT_MJPEG) {
 920         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 921                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 922                           31, 1);
 923         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 924                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 925                           31, 0);
 926     }
 927
 928     if (ff_rate_control_init(s) < 0)
 929         return -1;
 930
 931 #if FF_API_ERROR_RATE
 932     FF_DISABLE_DEPRECATION_WARNINGS
 933     if (avctx->error_rate)
 934         s->error_rate = avctx->error_rate;
 935     FF_ENABLE_DEPRECATION_WARNINGS;
 936 #endif
 937
 938 #if FF_API_NORMALIZE_AQP
 939     FF_DISABLE_DEPRECATION_WARNINGS
 940     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 941         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 942     FF_ENABLE_DEPRECATION_WARNINGS;
 943 #endif
 944
 945 #if FF_API_MV0
 946     FF_DISABLE_DEPRECATION_WARNINGS
 947     if (avctx->flags & CODEC_FLAG_MV0)
 948         s->mpv_flags |= FF_MPV_FLAG_MV0;
 949     FF_ENABLE_DEPRECATION_WARNINGS
 950 #endif
 951
 952 #if FF_API_MPV_OPT
 953     FF_DISABLE_DEPRECATION_WARNINGS
 954     if (avctx->rc_qsquish != 0.0)
 955         s->rc_qsquish = avctx->rc_qsquish;
 956     if (avctx->rc_qmod_amp != 0.0)
 957         s->rc_qmod_amp = avctx->rc_qmod_amp;
 958     if (avctx->rc_qmod_freq)
 959         s->rc_qmod_freq = avctx->rc_qmod_freq;
 960     if (avctx->rc_buffer_aggressivity != 1.0)
 961         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
 962     if (avctx->rc_initial_cplx != 0.0)
 963         s->rc_initial_cplx = avctx->rc_initial_cplx;
 964     if (avctx->lmin)
 965         s->lmin = avctx->lmin;
 966     if (avctx->lmax)
 967         s->lmax = avctx->lmax;
 968
 969     if (avctx->rc_eq) {
 970         av_freep(&s->rc_eq);
 971         s->rc_eq = av_strdup(avctx->rc_eq);
 972         if (!s->rc_eq)
 973             return AVERROR(ENOMEM);
 974     }
 975     FF_ENABLE_DEPRECATION_WARNINGS
 976 #endif
 977
 978     if (avctx->b_frame_strategy == 2) {
 979         for (i = 0; i < s->max_b_frames + 2; i++) {
 980             s->tmp_frames[i] = av_frame_alloc();
 981             if (!s->tmp_frames[i])
 982                 return AVERROR(ENOMEM);
 983
 984             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 985             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 986             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 987
 988             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 989             if (ret < 0)
 990                 return ret;
 991         }
 992     }
 993
 994     return 0;
 995 fail:
 996     ff_mpv_encode_end(avctx);
 997     return AVERROR_UNKNOWN;
 998 }
 999
1000 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1001 {
1002     MpegEncContext *s = avctx->priv_data;
1003     int i;
1004
1005     ff_rate_control_uninit(s);
1006
1007     ff_mpv_common_end(s);
1008     if (CONFIG_MJPEG_ENCODER &&
1009         s->out_format == FMT_MJPEG)
1010         ff_mjpeg_encode_close(s);
1011
1012     av_freep(&avctx->extradata);
1013
1014     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1015         av_frame_free(&s->tmp_frames[i]);
1016
1017     ff_free_picture_tables(&s->new_picture);
1018     ff_mpeg_unref_picture(s, &s->new_picture);
1019
1020     av_freep(&s->avctx->stats_out);
1021     av_freep(&s->ac_stats);
1022
1023     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1024     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1025     s->q_chroma_intra_matrix=   NULL;
1026     s->q_chroma_intra_matrix16= NULL;
1027     av_freep(&s->q_intra_matrix);
1028     av_freep(&s->q_inter_matrix);
1029     av_freep(&s->q_intra_matrix16);
1030     av_freep(&s->q_inter_matrix16);
1031     av_freep(&s->input_picture);
1032     av_freep(&s->reordered_input_picture);
1033     av_freep(&s->dct_offset);
1034
1035     return 0;
1036 }
1037
1038 static int get_sae(uint8_t *src, int ref, int stride)
1039 {
1040     int x,y;
1041     int acc = 0;
1042
1043     for (y = 0; y < 16; y++) {
1044         for (x = 0; x < 16; x++) {
1045             acc += FFABS(src[x + y * stride] - ref);
1046         }
1047     }
1048
1049     return acc;
1050 }
1051
1052 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1053                            uint8_t *ref, int stride)
1054 {
1055     int x, y, w, h;
1056     int acc = 0;
1057
1058     w = s->width  & ~15;
1059     h = s->height & ~15;
1060
1061     for (y = 0; y < h; y += 16) {
1062         for (x = 0; x < w; x += 16) {
1063             int offset = x + y * stride;
1064             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1065                                       stride, 16);
1066             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1067             int sae  = get_sae(src + offset, mean, stride);
1068
1069             acc += sae + 500 < sad;
1070         }
1071     }
1072     return acc;
1073 }
1074
1075
1076 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1077 {
1078     Picture *pic = NULL;
1079     int64_t pts;
1080     int i, display_picture_number = 0, ret;
1081     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1082                                                  (s->low_delay ? 0 : 1);
1083     int direct = 1;
1084
1085     if (pic_arg) {
1086         pts = pic_arg->pts;
1087         display_picture_number = s->input_picture_number++;
1088
1089         if (pts != AV_NOPTS_VALUE) {
1090             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1091                 int64_t last = s->user_specified_pts;
1092
1093                 if (pts <= last) {
1094                     av_log(s->avctx, AV_LOG_ERROR,
1095                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1096                            pts, last);
1097                     return AVERROR(EINVAL);
1098                 }
1099
1100                 if (!s->low_delay && display_picture_number == 1)
1101                     s->dts_delta = pts - last;
1102             }
1103             s->user_specified_pts = pts;
1104         } else {
1105             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1106                 s->user_specified_pts =
1107                 pts = s->user_specified_pts + 1;
1108                 av_log(s->avctx, AV_LOG_INFO,
1109                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1110                        pts);
1111             } else {
1112                 pts = display_picture_number;
1113             }
1114         }
1115     }
1116
1117     if (pic_arg) {
1118         if (!pic_arg->buf[0] ||
1119             pic_arg->linesize[0] != s->linesize ||
1120             pic_arg->linesize[1] != s->uvlinesize ||
1121             pic_arg->linesize[2] != s->uvlinesize)
1122             direct = 0;
1123         if ((s->width & 15) || (s->height & 15))
1124             direct = 0;
1125         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1126             direct = 0;
1127         if (s->linesize & (STRIDE_ALIGN-1))
1128             direct = 0;
1129
1130         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1131                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1132
1133         i = ff_find_unused_picture(s, direct);
1134         if (i < 0)
1135             return i;
1136
1137         pic = &s->picture[i];
1138         pic->reference = 3;
1139
1140         if (direct) {
1141             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1142                 return ret;
1143             if (ff_alloc_picture(s, pic, 1) < 0) {
1144                 return -1;
1145             }
1146         } else {
1147             if (ff_alloc_picture(s, pic, 0) < 0) {
1148                 return -1;
1149             }
1150
1151             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1152                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1153                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1154                 // empty
1155             } else {
1156                 int h_chroma_shift, v_chroma_shift;
1157                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1158                                                  &h_chroma_shift,
1159                                                  &v_chroma_shift);
1160
1161                 for (i = 0; i < 3; i++) {
1162                     int src_stride = pic_arg->linesize[i];
1163                     int dst_stride = i ? s->uvlinesize : s->linesize;
1164                     int h_shift = i ? h_chroma_shift : 0;
1165                     int v_shift = i ? v_chroma_shift : 0;
1166                     int w = s->width  >> h_shift;
1167                     int h = s->height >> v_shift;
1168                     uint8_t *src = pic_arg->data[i];
1169                     uint8_t *dst = pic->f->data[i];
1170                     int vpad = 16;
1171
1172                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1173                         && !s->progressive_sequence
1174                         && FFALIGN(s->height, 32) - s->height > 16)
1175                         vpad = 32;
1176
1177                     if (!s->avctx->rc_buffer_size)
1178                         dst += INPLACE_OFFSET;
1179
1180                     if (src_stride == dst_stride)
1181                         memcpy(dst, src, src_stride * h);
1182                     else {
1183                         int h2 = h;
1184                         uint8_t *dst2 = dst;
1185                         while (h2--) {
1186                             memcpy(dst2, src, w);
1187                             dst2 += dst_stride;
1188                             src += src_stride;
1189                         }
1190                     }
1191                     if ((s->width & 15) || (s->height & (vpad-1))) {
1192                         s->mpvencdsp.draw_edges(dst, dst_stride,
1193                                                 w, h,
1194                                                 16>>h_shift,
1195                                                 vpad>>v_shift,
1196                                                 EDGE_BOTTOM);
1197                     }
1198                 }
1199             }
1200         }
1201         ret = av_frame_copy_props(pic->f, pic_arg);
1202         if (ret < 0)
1203             return ret;
1204
1205         pic->f->display_picture_number = display_picture_number;
1206         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1207     }
1208
1209     /* shift buffer entries */
1210     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1211         s->input_picture[i - 1] = s->input_picture[i];
1212
1213     s->input_picture[encoding_delay] = (Picture*) pic;
1214
1215     return 0;
1216 }
1217
1218 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1219 {
1220     int x, y, plane;
1221     int score = 0;
1222     int64_t score64 = 0;
1223
1224     for (plane = 0; plane < 3; plane++) {
1225         const int stride = p->f->linesize[plane];
1226         const int bw = plane ? 1 : 2;
1227         for (y = 0; y < s->mb_height * bw; y++) {
1228             for (x = 0; x < s->mb_width * bw; x++) {
1229                 int off = p->shared ? 0 : 16;
1230                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1231                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1232                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1233
1234                 switch (FFABS(s->avctx->frame_skip_exp)) {
1235                 case 0: score    =  FFMAX(score, v);          break;
1236                 case 1: score   += FFABS(v);                  break;
1237                 case 2: score64 += v * (int64_t)v;                       break;
1238                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1239                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1240                 }
1241             }
1242         }
1243     }
1244     emms_c();
1245
1246     if (score)
1247         score64 = score;
1248     if (s->avctx->frame_skip_exp < 0)
1249         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1250                       -1.0/s->avctx->frame_skip_exp);
1251
1252     if (score64 < s->avctx->frame_skip_threshold)
1253         return 1;
1254     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1255         return 1;
1256     return 0;
1257 }
1258
1259 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1260 {
1261     AVPacket pkt = { 0 };
1262     int ret, got_output;
1263
1264     av_init_packet(&pkt);
1265     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1266     if (ret < 0)
1267         return ret;
1268
1269     ret = pkt.size;
1270     av_free_packet(&pkt);
1271     return ret;
1272 }
1273
1274 static int estimate_best_b_count(MpegEncContext *s)
1275 {
1276     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1277     AVCodecContext *c = avcodec_alloc_context3(NULL);
1278     const int scale = s->avctx->brd_scale;
1279     int i, j, out_size, p_lambda, b_lambda, lambda2;
1280     int64_t best_rd  = INT64_MAX;
1281     int best_b_count = -1;
1282
1283     av_assert0(scale >= 0 && scale <= 3);
1284
1285     //emms_c();
1286     //s->next_picture_ptr->quality;
1287     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1288     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1289     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1290     if (!b_lambda) // FIXME we should do this somewhere else
1291         b_lambda = p_lambda;
1292     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1293                FF_LAMBDA_SHIFT;
1294
1295     c->width        = s->width  >> scale;
1296     c->height       = s->height >> scale;
1297     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1298     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1299     c->mb_decision  = s->avctx->mb_decision;
1300     c->me_cmp       = s->avctx->me_cmp;
1301     c->mb_cmp       = s->avctx->mb_cmp;
1302     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1303     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1304     c->time_base    = s->avctx->time_base;
1305     c->max_b_frames = s->max_b_frames;
1306
1307     if (avcodec_open2(c, codec, NULL) < 0)
1308         return -1;
1309
1310     for (i = 0; i < s->max_b_frames + 2; i++) {
1311         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1312                                                 s->next_picture_ptr;
1313         uint8_t *data[4];
1314
1315         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1316             pre_input = *pre_input_ptr;
1317             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1318
1319             if (!pre_input.shared && i) {
1320                 data[0] += INPLACE_OFFSET;
1321                 data[1] += INPLACE_OFFSET;
1322                 data[2] += INPLACE_OFFSET;
1323             }
1324
1325             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1326                                        s->tmp_frames[i]->linesize[0],
1327                                        data[0],
1328                                        pre_input.f->linesize[0],
1329                                        c->width, c->height);
1330             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1331                                        s->tmp_frames[i]->linesize[1],
1332                                        data[1],
1333                                        pre_input.f->linesize[1],
1334                                        c->width >> 1, c->height >> 1);
1335             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1336                                        s->tmp_frames[i]->linesize[2],
1337                                        data[2],
1338                                        pre_input.f->linesize[2],
1339                                        c->width >> 1, c->height >> 1);
1340         }
1341     }
1342
1343     for (j = 0; j < s->max_b_frames + 1; j++) {
1344         int64_t rd = 0;
1345
1346         if (!s->input_picture[j])
1347             break;
1348
1349         c->error[0] = c->error[1] = c->error[2] = 0;
1350
1351         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1352         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1353
1354         out_size = encode_frame(c, s->tmp_frames[0]);
1355
1356         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1357
1358         for (i = 0; i < s->max_b_frames + 1; i++) {
1359             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1360
1361             s->tmp_frames[i + 1]->pict_type = is_p ?
1362                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1363             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1364
1365             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1366
1367             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1368         }
1369
1370         /* get the delayed frames */
1371         while (out_size) {
1372             out_size = encode_frame(c, NULL);
1373             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1374         }
1375
1376         rd += c->error[0] + c->error[1] + c->error[2];
1377
1378         if (rd < best_rd) {
1379             best_rd = rd;
1380             best_b_count = j;
1381         }
1382     }
1383
1384     avcodec_close(c);
1385     av_freep(&c);
1386
1387     return best_b_count;
1388 }
1389
1390 static int select_input_picture(MpegEncContext *s)
1391 {
1392     int i, ret;
1393
1394     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1395         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1396     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1397
1398     /* set next picture type & ordering */
1399     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1400         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1401             if (s->picture_in_gop_number < s->gop_size &&
1402                 s->next_picture_ptr &&
1403                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1404                 // FIXME check that te gop check above is +-1 correct
1405                 av_frame_unref(s->input_picture[0]->f);
1406
1407                 ff_vbv_update(s, 0);
1408
1409                 goto no_output_pic;
1410             }
1411         }
1412
1413         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1414             !s->next_picture_ptr || s->intra_only) {
1415             s->reordered_input_picture[0] = s->input_picture[0];
1416             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1417             s->reordered_input_picture[0]->f->coded_picture_number =
1418                 s->coded_picture_number++;
1419         } else {
1420             int b_frames;
1421
1422             if (s->flags & CODEC_FLAG_PASS2) {
1423                 for (i = 0; i < s->max_b_frames + 1; i++) {
1424                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1425
1426                     if (pict_num >= s->rc_context.num_entries)
1427                         break;
1428                     if (!s->input_picture[i]) {
1429                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1430                         break;
1431                     }
1432
1433                     s->input_picture[i]->f->pict_type =
1434                         s->rc_context.entry[pict_num].new_pict_type;
1435                 }
1436             }
1437
1438             if (s->avctx->b_frame_strategy == 0) {
1439                 b_frames = s->max_b_frames;
1440                 while (b_frames && !s->input_picture[b_frames])
1441                     b_frames--;
1442             } else if (s->avctx->b_frame_strategy == 1) {
1443                 for (i = 1; i < s->max_b_frames + 1; i++) {
1444                     if (s->input_picture[i] &&
1445                         s->input_picture[i]->b_frame_score == 0) {
1446                         s->input_picture[i]->b_frame_score =
1447                             get_intra_count(s,
1448                                             s->input_picture[i    ]->f->data[0],
1449                                             s->input_picture[i - 1]->f->data[0],
1450                                             s->linesize) + 1;
1451                     }
1452                 }
1453                 for (i = 0; i < s->max_b_frames + 1; i++) {
1454                     if (!s->input_picture[i] ||
1455                         s->input_picture[i]->b_frame_score - 1 >
1456                             s->mb_num / s->avctx->b_sensitivity)
1457                         break;
1458                 }
1459
1460                 b_frames = FFMAX(0, i - 1);
1461
1462                 /* reset scores */
1463                 for (i = 0; i < b_frames + 1; i++) {
1464                     s->input_picture[i]->b_frame_score = 0;
1465                 }
1466             } else if (s->avctx->b_frame_strategy == 2) {
1467                 b_frames = estimate_best_b_count(s);
1468             } else {
1469                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1470                 b_frames = 0;
1471             }
1472
1473             emms_c();
1474
1475             for (i = b_frames - 1; i >= 0; i--) {
1476                 int type = s->input_picture[i]->f->pict_type;
1477                 if (type && type != AV_PICTURE_TYPE_B)
1478                     b_frames = i;
1479             }
1480             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1481                 b_frames == s->max_b_frames) {
1482                 av_log(s->avctx, AV_LOG_ERROR,
1483                        "warning, too many b frames in a row\n");
1484             }
1485
1486             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1487                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1488                     s->gop_size > s->picture_in_gop_number) {
1489                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1490                 } else {
1491                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1492                         b_frames = 0;
1493                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1494                 }
1495             }
1496
1497             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1498                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1499                 b_frames--;
1500
1501             s->reordered_input_picture[0] = s->input_picture[b_frames];
1502             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1503                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1504             s->reordered_input_picture[0]->f->coded_picture_number =
1505                 s->coded_picture_number++;
1506             for (i = 0; i < b_frames; i++) {
1507                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1508                 s->reordered_input_picture[i + 1]->f->pict_type =
1509                     AV_PICTURE_TYPE_B;
1510                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1511                     s->coded_picture_number++;
1512             }
1513         }
1514     }
1515 no_output_pic:
1516     if (s->reordered_input_picture[0]) {
1517         s->reordered_input_picture[0]->reference =
1518            s->reordered_input_picture[0]->f->pict_type !=
1519                AV_PICTURE_TYPE_B ? 3 : 0;
1520
1521         ff_mpeg_unref_picture(s, &s->new_picture);
1522         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1523             return ret;
1524
1525         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1526             // input is a shared pix, so we can't modifiy it -> alloc a new
1527             // one & ensure that the shared one is reuseable
1528
1529             Picture *pic;
1530             int i = ff_find_unused_picture(s, 0);
1531             if (i < 0)
1532                 return i;
1533             pic = &s->picture[i];
1534
1535             pic->reference = s->reordered_input_picture[0]->reference;
1536             if (ff_alloc_picture(s, pic, 0) < 0) {
1537                 return -1;
1538             }
1539
1540             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1541             if (ret < 0)
1542                 return ret;
1543
1544             /* mark us unused / free shared pic */
1545             av_frame_unref(s->reordered_input_picture[0]->f);
1546             s->reordered_input_picture[0]->shared = 0;
1547
1548             s->current_picture_ptr = pic;
1549         } else {
1550             // input is not a shared pix -> reuse buffer for current_pix
1551             s->current_picture_ptr = s->reordered_input_picture[0];
1552             for (i = 0; i < 4; i++) {
1553                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1554             }
1555         }
1556         ff_mpeg_unref_picture(s, &s->current_picture);
1557         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1558                                        s->current_picture_ptr)) < 0)
1559             return ret;
1560
1561         s->picture_number = s->new_picture.f->display_picture_number;
1562     } else {
1563         ff_mpeg_unref_picture(s, &s->new_picture);
1564     }
1565     return 0;
1566 }
1567
1568 static void frame_end(MpegEncContext *s)
1569 {
1570     if (s->unrestricted_mv &&
1571         s->current_picture.reference &&
1572         !s->intra_only) {
1573         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1574         int hshift = desc->log2_chroma_w;
1575         int vshift = desc->log2_chroma_h;
1576         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1577                                 s->current_picture.f->linesize[0],
1578                                 s->h_edge_pos, s->v_edge_pos,
1579                                 EDGE_WIDTH, EDGE_WIDTH,
1580                                 EDGE_TOP | EDGE_BOTTOM);
1581         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1582                                 s->current_picture.f->linesize[1],
1583                                 s->h_edge_pos >> hshift,
1584                                 s->v_edge_pos >> vshift,
1585                                 EDGE_WIDTH >> hshift,
1586                                 EDGE_WIDTH >> vshift,
1587                                 EDGE_TOP | EDGE_BOTTOM);
1588         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1589                                 s->current_picture.f->linesize[2],
1590                                 s->h_edge_pos >> hshift,
1591                                 s->v_edge_pos >> vshift,
1592                                 EDGE_WIDTH >> hshift,
1593                                 EDGE_WIDTH >> vshift,
1594                                 EDGE_TOP | EDGE_BOTTOM);
1595     }
1596
1597     emms_c();
1598
1599     s->last_pict_type                 = s->pict_type;
1600     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1601     if (s->pict_type!= AV_PICTURE_TYPE_B)
1602         s->last_non_b_pict_type = s->pict_type;
1603
1604     s->avctx->coded_frame = s->current_picture_ptr->f;
1605
1606 }
1607
1608 static void update_noise_reduction(MpegEncContext *s)
1609 {
1610     int intra, i;
1611
1612     for (intra = 0; intra < 2; intra++) {
1613         if (s->dct_count[intra] > (1 << 16)) {
1614             for (i = 0; i < 64; i++) {
1615                 s->dct_error_sum[intra][i] >>= 1;
1616             }
1617             s->dct_count[intra] >>= 1;
1618         }
1619
1620         for (i = 0; i < 64; i++) {
1621             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1622                                        s->dct_count[intra] +
1623                                        s->dct_error_sum[intra][i] / 2) /
1624                                       (s->dct_error_sum[intra][i] + 1);
1625         }
1626     }
1627 }
1628
1629 static int frame_start(MpegEncContext *s)
1630 {
1631     int ret;
1632
1633     /* mark & release old frames */
1634     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1635         s->last_picture_ptr != s->next_picture_ptr &&
1636         s->last_picture_ptr->f->buf[0]) {
1637         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1638     }
1639
1640     s->current_picture_ptr->f->pict_type = s->pict_type;
1641     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1642
1643     ff_mpeg_unref_picture(s, &s->current_picture);
1644     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1645                                    s->current_picture_ptr)) < 0)
1646         return ret;
1647
1648     if (s->pict_type != AV_PICTURE_TYPE_B) {
1649         s->last_picture_ptr = s->next_picture_ptr;
1650         if (!s->droppable)
1651             s->next_picture_ptr = s->current_picture_ptr;
1652     }
1653
1654     if (s->last_picture_ptr) {
1655         ff_mpeg_unref_picture(s, &s->last_picture);
1656         if (s->last_picture_ptr->f->buf[0] &&
1657             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1658                                        s->last_picture_ptr)) < 0)
1659             return ret;
1660     }
1661     if (s->next_picture_ptr) {
1662         ff_mpeg_unref_picture(s, &s->next_picture);
1663         if (s->next_picture_ptr->f->buf[0] &&
1664             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1665                                        s->next_picture_ptr)) < 0)
1666             return ret;
1667     }
1668
1669     if (s->picture_structure!= PICT_FRAME) {
1670         int i;
1671         for (i = 0; i < 4; i++) {
1672             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1673                 s->current_picture.f->data[i] +=
1674                     s->current_picture.f->linesize[i];
1675             }
1676             s->current_picture.f->linesize[i] *= 2;
1677             s->last_picture.f->linesize[i]    *= 2;
1678             s->next_picture.f->linesize[i]    *= 2;
1679         }
1680     }
1681
1682     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1683         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1684         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1685     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1686         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1687         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1688     } else {
1689         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1690         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1691     }
1692
1693     if (s->dct_error_sum) {
1694         av_assert2(s->avctx->noise_reduction && s->encoding);
1695         update_noise_reduction(s);
1696     }
1697
1698     return 0;
1699 }
1700
1701 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1702                           const AVFrame *pic_arg, int *got_packet)
1703 {
1704     MpegEncContext *s = avctx->priv_data;
1705     int i, stuffing_count, ret;
1706     int context_count = s->slice_context_count;
1707
1708     s->picture_in_gop_number++;
1709
1710     if (load_input_picture(s, pic_arg) < 0)
1711         return -1;
1712
1713     if (select_input_picture(s) < 0) {
1714         return -1;
1715     }
1716
1717     /* output? */
1718     if (s->new_picture.f->data[0]) {
1719         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1720         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - FF_INPUT_BUFFER_PADDING_SIZE
1721                                               :
1722                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1723         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
1724             return ret;
1725         if (s->mb_info) {
1726             s->mb_info_ptr = av_packet_new_side_data(pkt,
1727                                  AV_PKT_DATA_H263_MB_INFO,
1728                                  s->mb_width*s->mb_height*12);
1729             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1730         }
1731
1732         for (i = 0; i < context_count; i++) {
1733             int start_y = s->thread_context[i]->start_mb_y;
1734             int   end_y = s->thread_context[i]->  end_mb_y;
1735             int h       = s->mb_height;
1736             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1737             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1738
1739             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1740         }
1741
1742         s->pict_type = s->new_picture.f->pict_type;
1743         //emms_c();
1744         ret = frame_start(s);
1745         if (ret < 0)
1746             return ret;
1747 vbv_retry:
1748         ret = encode_picture(s, s->picture_number);
1749         if (growing_buffer) {
1750             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1751             pkt->data = s->pb.buf;
1752             pkt->size = avctx->internal->byte_buffer_size;
1753         }
1754         if (ret < 0)
1755             return -1;
1756
1757         avctx->header_bits = s->header_bits;
1758         avctx->mv_bits     = s->mv_bits;
1759         avctx->misc_bits   = s->misc_bits;
1760         avctx->i_tex_bits  = s->i_tex_bits;
1761         avctx->p_tex_bits  = s->p_tex_bits;
1762         avctx->i_count     = s->i_count;
1763         // FIXME f/b_count in avctx
1764         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1765         avctx->skip_count  = s->skip_count;
1766
1767         frame_end(s);
1768
1769         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1770             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1771
1772         if (avctx->rc_buffer_size) {
1773             RateControlContext *rcc = &s->rc_context;
1774             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1775
1776             if (put_bits_count(&s->pb) > max_size &&
1777                 s->lambda < s->lmax) {
1778                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1779                                        (s->qscale + 1) / s->qscale);
1780                 if (s->adaptive_quant) {
1781                     int i;
1782                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1783                         s->lambda_table[i] =
1784                             FFMAX(s->lambda_table[i] + 1,
1785                                   s->lambda_table[i] * (s->qscale + 1) /
1786                                   s->qscale);
1787                 }
1788                 s->mb_skipped = 0;        // done in frame_start()
1789                 // done in encode_picture() so we must undo it
1790                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1791                     if (s->flipflop_rounding          ||
1792                         s->codec_id == AV_CODEC_ID_H263P ||
1793                         s->codec_id == AV_CODEC_ID_MPEG4)
1794                         s->no_rounding ^= 1;
1795                 }
1796                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1797                     s->time_base       = s->last_time_base;
1798                     s->last_non_b_time = s->time - s->pp_time;
1799                 }
1800                 for (i = 0; i < context_count; i++) {
1801                     PutBitContext *pb = &s->thread_context[i]->pb;
1802                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1803                 }
1804                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1805                 goto vbv_retry;
1806             }
1807
1808             av_assert0(s->avctx->rc_max_rate);
1809         }
1810
1811         if (s->flags & CODEC_FLAG_PASS1)
1812             ff_write_pass1_stats(s);
1813
1814         for (i = 0; i < 4; i++) {
1815             s->current_picture_ptr->f->error[i] =
1816             s->current_picture.f->error[i] =
1817                 s->current_picture.error[i];
1818             avctx->error[i] += s->current_picture_ptr->f->error[i];
1819         }
1820
1821         if (s->flags & CODEC_FLAG_PASS1)
1822             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1823                    avctx->i_tex_bits + avctx->p_tex_bits ==
1824                        put_bits_count(&s->pb));
1825         flush_put_bits(&s->pb);
1826         s->frame_bits  = put_bits_count(&s->pb);
1827
1828         stuffing_count = ff_vbv_update(s, s->frame_bits);
1829         s->stuffing_bits = 8*stuffing_count;
1830         if (stuffing_count) {
1831             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1832                     stuffing_count + 50) {
1833                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1834                 return -1;
1835             }
1836
1837             switch (s->codec_id) {
1838             case AV_CODEC_ID_MPEG1VIDEO:
1839             case AV_CODEC_ID_MPEG2VIDEO:
1840                 while (stuffing_count--) {
1841                     put_bits(&s->pb, 8, 0);
1842                 }
1843             break;
1844             case AV_CODEC_ID_MPEG4:
1845                 put_bits(&s->pb, 16, 0);
1846                 put_bits(&s->pb, 16, 0x1C3);
1847                 stuffing_count -= 4;
1848                 while (stuffing_count--) {
1849                     put_bits(&s->pb, 8, 0xFF);
1850                 }
1851             break;
1852             default:
1853                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1854             }
1855             flush_put_bits(&s->pb);
1856             s->frame_bits  = put_bits_count(&s->pb);
1857         }
1858
1859         /* update mpeg1/2 vbv_delay for CBR */
1860         if (s->avctx->rc_max_rate                          &&
1861             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1862             s->out_format == FMT_MPEG1                     &&
1863             90000LL * (avctx->rc_buffer_size - 1) <=
1864                 s->avctx->rc_max_rate * 0xFFFFLL) {
1865             int vbv_delay, min_delay;
1866             double inbits  = s->avctx->rc_max_rate *
1867                              av_q2d(s->avctx->time_base);
1868             int    minbits = s->frame_bits - 8 *
1869                              (s->vbv_delay_ptr - s->pb.buf - 1);
1870             double bits    = s->rc_context.buffer_index + minbits - inbits;
1871
1872             if (bits < 0)
1873                 av_log(s->avctx, AV_LOG_ERROR,
1874                        "Internal error, negative bits\n");
1875
1876             assert(s->repeat_first_field == 0);
1877
1878             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1879             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1880                         s->avctx->rc_max_rate;
1881
1882             vbv_delay = FFMAX(vbv_delay, min_delay);
1883
1884             av_assert0(vbv_delay < 0xFFFF);
1885
1886             s->vbv_delay_ptr[0] &= 0xF8;
1887             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1888             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1889             s->vbv_delay_ptr[2] &= 0x07;
1890             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1891             avctx->vbv_delay     = vbv_delay * 300;
1892         }
1893         s->total_bits     += s->frame_bits;
1894         avctx->frame_bits  = s->frame_bits;
1895
1896         pkt->pts = s->current_picture.f->pts;
1897         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1898             if (!s->current_picture.f->coded_picture_number)
1899                 pkt->dts = pkt->pts - s->dts_delta;
1900             else
1901                 pkt->dts = s->reordered_pts;
1902             s->reordered_pts = pkt->pts;
1903         } else
1904             pkt->dts = pkt->pts;
1905         if (s->current_picture.f->key_frame)
1906             pkt->flags |= AV_PKT_FLAG_KEY;
1907         if (s->mb_info)
1908             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1909     } else {
1910         s->frame_bits = 0;
1911     }
1912
1913     /* release non-reference frames */
1914     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1915         if (!s->picture[i].reference)
1916             ff_mpeg_unref_picture(s, &s->picture[i]);
1917     }
1918
1919     av_assert1((s->frame_bits & 7) == 0);
1920
1921     pkt->size = s->frame_bits / 8;
1922     *got_packet = !!pkt->size;
1923     return 0;
1924 }
1925
1926 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1927                                                 int n, int threshold)
1928 {
1929     static const char tab[64] = {
1930         3, 2, 2, 1, 1, 1, 1, 1,
1931         1, 1, 1, 1, 1, 1, 1, 1,
1932         1, 1, 1, 1, 1, 1, 1, 1,
1933         0, 0, 0, 0, 0, 0, 0, 0,
1934         0, 0, 0, 0, 0, 0, 0, 0,
1935         0, 0, 0, 0, 0, 0, 0, 0,
1936         0, 0, 0, 0, 0, 0, 0, 0,
1937         0, 0, 0, 0, 0, 0, 0, 0
1938     };
1939     int score = 0;
1940     int run = 0;
1941     int i;
1942     int16_t *block = s->block[n];
1943     const int last_index = s->block_last_index[n];
1944     int skip_dc;
1945
1946     if (threshold < 0) {
1947         skip_dc = 0;
1948         threshold = -threshold;
1949     } else
1950         skip_dc = 1;
1951
1952     /* Are all we could set to zero already zero? */
1953     if (last_index <= skip_dc - 1)
1954         return;
1955
1956     for (i = 0; i <= last_index; i++) {
1957         const int j = s->intra_scantable.permutated[i];
1958         const int level = FFABS(block[j]);
1959         if (level == 1) {
1960             if (skip_dc && i == 0)
1961                 continue;
1962             score += tab[run];
1963             run = 0;
1964         } else if (level > 1) {
1965             return;
1966         } else {
1967             run++;
1968         }
1969     }
1970     if (score >= threshold)
1971         return;
1972     for (i = skip_dc; i <= last_index; i++) {
1973         const int j = s->intra_scantable.permutated[i];
1974         block[j] = 0;
1975     }
1976     if (block[0])
1977         s->block_last_index[n] = 0;
1978     else
1979         s->block_last_index[n] = -1;
1980 }
1981
1982 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1983                                int last_index)
1984 {
1985     int i;
1986     const int maxlevel = s->max_qcoeff;
1987     const int minlevel = s->min_qcoeff;
1988     int overflow = 0;
1989
1990     if (s->mb_intra) {
1991         i = 1; // skip clipping of intra dc
1992     } else
1993         i = 0;
1994
1995     for (; i <= last_index; i++) {
1996         const int j = s->intra_scantable.permutated[i];
1997         int level = block[j];
1998
1999         if (level > maxlevel) {
2000             level = maxlevel;
2001             overflow++;
2002         } else if (level < minlevel) {
2003             level = minlevel;
2004             overflow++;
2005         }
2006
2007         block[j] = level;
2008     }
2009
2010     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2011         av_log(s->avctx, AV_LOG_INFO,
2012                "warning, clipping %d dct coefficients to %d..%d\n",
2013                overflow, minlevel, maxlevel);
2014 }
2015
2016 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2017 {
2018     int x, y;
2019     // FIXME optimize
2020     for (y = 0; y < 8; y++) {
2021         for (x = 0; x < 8; x++) {
2022             int x2, y2;
2023             int sum = 0;
2024             int sqr = 0;
2025             int count = 0;
2026
2027             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2028                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2029                     int v = ptr[x2 + y2 * stride];
2030                     sum += v;
2031                     sqr += v * v;
2032                     count++;
2033                 }
2034             }
2035             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2036         }
2037     }
2038 }
2039
2040 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2041                                                 int motion_x, int motion_y,
2042                                                 int mb_block_height,
2043                                                 int mb_block_width,
2044                                                 int mb_block_count)
2045 {
2046     int16_t weight[12][64];
2047     int16_t orig[12][64];
2048     const int mb_x = s->mb_x;
2049     const int mb_y = s->mb_y;
2050     int i;
2051     int skip_dct[12];
2052     int dct_offset = s->linesize * 8; // default for progressive frames
2053     int uv_dct_offset = s->uvlinesize * 8;
2054     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2055     ptrdiff_t wrap_y, wrap_c;
2056
2057     for (i = 0; i < mb_block_count; i++)
2058         skip_dct[i] = s->skipdct;
2059
2060     if (s->adaptive_quant) {
2061         const int last_qp = s->qscale;
2062         const int mb_xy = mb_x + mb_y * s->mb_stride;
2063
2064         s->lambda = s->lambda_table[mb_xy];
2065         update_qscale(s);
2066
2067         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2068             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2069             s->dquant = s->qscale - last_qp;
2070
2071             if (s->out_format == FMT_H263) {
2072                 s->dquant = av_clip(s->dquant, -2, 2);
2073
2074                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2075                     if (!s->mb_intra) {
2076                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2077                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2078                                 s->dquant = 0;
2079                         }
2080                         if (s->mv_type == MV_TYPE_8X8)
2081                             s->dquant = 0;
2082                     }
2083                 }
2084             }
2085         }
2086         ff_set_qscale(s, last_qp + s->dquant);
2087     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2088         ff_set_qscale(s, s->qscale + s->dquant);
2089
2090     wrap_y = s->linesize;
2091     wrap_c = s->uvlinesize;
2092     ptr_y  = s->new_picture.f->data[0] +
2093              (mb_y * 16 * wrap_y)              + mb_x * 16;
2094     ptr_cb = s->new_picture.f->data[1] +
2095              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2096     ptr_cr = s->new_picture.f->data[2] +
2097              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2098
2099     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2100         uint8_t *ebuf = s->edge_emu_buffer + 36 * wrap_y;
2101         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2102         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2103         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2104                                  wrap_y, wrap_y,
2105                                  16, 16, mb_x * 16, mb_y * 16,
2106                                  s->width, s->height);
2107         ptr_y = ebuf;
2108         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2109                                  wrap_c, wrap_c,
2110                                  mb_block_width, mb_block_height,
2111                                  mb_x * mb_block_width, mb_y * mb_block_height,
2112                                  cw, ch);
2113         ptr_cb = ebuf + 16 * wrap_y;
2114         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2115                                  wrap_c, wrap_c,
2116                                  mb_block_width, mb_block_height,
2117                                  mb_x * mb_block_width, mb_y * mb_block_height,
2118                                  cw, ch);
2119         ptr_cr = ebuf + 16 * wrap_y + 16;
2120     }
2121
2122     if (s->mb_intra) {
2123         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2124             int progressive_score, interlaced_score;
2125
2126             s->interlaced_dct = 0;
2127             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2128                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2129                                                      NULL, wrap_y, 8) - 400;
2130
2131             if (progressive_score > 0) {
2132                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2133                                                         NULL, wrap_y * 2, 8) +
2134                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2135                                                         NULL, wrap_y * 2, 8);
2136                 if (progressive_score > interlaced_score) {
2137                     s->interlaced_dct = 1;
2138
2139                     dct_offset = wrap_y;
2140                     uv_dct_offset = wrap_c;
2141                     wrap_y <<= 1;
2142                     if (s->chroma_format == CHROMA_422 ||
2143                         s->chroma_format == CHROMA_444)
2144                         wrap_c <<= 1;
2145                 }
2146             }
2147         }
2148
2149         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2150         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2151         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2152         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2153
2154         if (s->flags & CODEC_FLAG_GRAY) {
2155             skip_dct[4] = 1;
2156             skip_dct[5] = 1;
2157         } else {
2158             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2159             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2160             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2161                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2162                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2163             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2164                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2165                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2166                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2167                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2168                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2169                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2170             }
2171         }
2172     } else {
2173         op_pixels_func (*op_pix)[4];
2174         qpel_mc_func (*op_qpix)[16];
2175         uint8_t *dest_y, *dest_cb, *dest_cr;
2176
2177         dest_y  = s->dest[0];
2178         dest_cb = s->dest[1];
2179         dest_cr = s->dest[2];
2180
2181         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2182             op_pix  = s->hdsp.put_pixels_tab;
2183             op_qpix = s->qdsp.put_qpel_pixels_tab;
2184         } else {
2185             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2186             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2187         }
2188
2189         if (s->mv_dir & MV_DIR_FORWARD) {
2190             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2191                           s->last_picture.f->data,
2192                           op_pix, op_qpix);
2193             op_pix  = s->hdsp.avg_pixels_tab;
2194             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2195         }
2196         if (s->mv_dir & MV_DIR_BACKWARD) {
2197             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2198                           s->next_picture.f->data,
2199                           op_pix, op_qpix);
2200         }
2201
2202         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2203             int progressive_score, interlaced_score;
2204
2205             s->interlaced_dct = 0;
2206             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2207                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2208                                                      ptr_y + wrap_y * 8,
2209                                                      wrap_y, 8) - 400;
2210
2211             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2212                 progressive_score -= 400;
2213
2214             if (progressive_score > 0) {
2215                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2216                                                         wrap_y * 2, 8) +
2217                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2218                                                         ptr_y + wrap_y,
2219                                                         wrap_y * 2, 8);
2220
2221                 if (progressive_score > interlaced_score) {
2222                     s->interlaced_dct = 1;
2223
2224                     dct_offset = wrap_y;
2225                     uv_dct_offset = wrap_c;
2226                     wrap_y <<= 1;
2227                     if (s->chroma_format == CHROMA_422)
2228                         wrap_c <<= 1;
2229                 }
2230             }
2231         }
2232
2233         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2234         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2235         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2236                             dest_y + dct_offset, wrap_y);
2237         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2238                             dest_y + dct_offset + 8, wrap_y);
2239
2240         if (s->flags & CODEC_FLAG_GRAY) {
2241             skip_dct[4] = 1;
2242             skip_dct[5] = 1;
2243         } else {
2244             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2245             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2246             if (!s->chroma_y_shift) { /* 422 */
2247                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2248                                     dest_cb + uv_dct_offset, wrap_c);
2249                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2250                                     dest_cr + uv_dct_offset, wrap_c);
2251             }
2252         }
2253         /* pre quantization */
2254         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2255                 2 * s->qscale * s->qscale) {
2256             // FIXME optimize
2257             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2258                 skip_dct[0] = 1;
2259             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2260                 skip_dct[1] = 1;
2261             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2262                                wrap_y, 8) < 20 * s->qscale)
2263                 skip_dct[2] = 1;
2264             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2265                                wrap_y, 8) < 20 * s->qscale)
2266                 skip_dct[3] = 1;
2267             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2268                 skip_dct[4] = 1;
2269             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2270                 skip_dct[5] = 1;
2271             if (!s->chroma_y_shift) { /* 422 */
2272                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2273                                    dest_cb + uv_dct_offset,
2274                                    wrap_c, 8) < 20 * s->qscale)
2275                     skip_dct[6] = 1;
2276                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2277                                    dest_cr + uv_dct_offset,
2278                                    wrap_c, 8) < 20 * s->qscale)
2279                     skip_dct[7] = 1;
2280             }
2281         }
2282     }
2283
2284     if (s->quantizer_noise_shaping) {
2285         if (!skip_dct[0])
2286             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2287         if (!skip_dct[1])
2288             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2289         if (!skip_dct[2])
2290             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2291         if (!skip_dct[3])
2292             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2293         if (!skip_dct[4])
2294             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2295         if (!skip_dct[5])
2296             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2297         if (!s->chroma_y_shift) { /* 422 */
2298             if (!skip_dct[6])
2299                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2300                                   wrap_c);
2301             if (!skip_dct[7])
2302                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2303                                   wrap_c);
2304         }
2305         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2306     }
2307
2308     /* DCT & quantize */
2309     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2310     {
2311         for (i = 0; i < mb_block_count; i++) {
2312             if (!skip_dct[i]) {
2313                 int overflow;
2314                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2315                 // FIXME we could decide to change to quantizer instead of
2316                 // clipping
2317                 // JS: I don't think that would be a good idea it could lower
2318                 //     quality instead of improve it. Just INTRADC clipping
2319                 //     deserves changes in quantizer
2320                 if (overflow)
2321                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2322             } else
2323                 s->block_last_index[i] = -1;
2324         }
2325         if (s->quantizer_noise_shaping) {
2326             for (i = 0; i < mb_block_count; i++) {
2327                 if (!skip_dct[i]) {
2328                     s->block_last_index[i] =
2329                         dct_quantize_refine(s, s->block[i], weight[i],
2330                                             orig[i], i, s->qscale);
2331                 }
2332             }
2333         }
2334
2335         if (s->luma_elim_threshold && !s->mb_intra)
2336             for (i = 0; i < 4; i++)
2337                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2338         if (s->chroma_elim_threshold && !s->mb_intra)
2339             for (i = 4; i < mb_block_count; i++)
2340                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2341
2342         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2343             for (i = 0; i < mb_block_count; i++) {
2344                 if (s->block_last_index[i] == -1)
2345                     s->coded_score[i] = INT_MAX / 256;
2346             }
2347         }
2348     }
2349
2350     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2351         s->block_last_index[4] =
2352         s->block_last_index[5] = 0;
2353         s->block[4][0] =
2354         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2355         if (!s->chroma_y_shift) { /* 422 / 444 */
2356             for (i=6; i<12; i++) {
2357                 s->block_last_index[i] = 0;
2358                 s->block[i][0] = s->block[4][0];
2359             }
2360         }
2361     }
2362
2363     // non c quantize code returns incorrect block_last_index FIXME
2364     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2365         for (i = 0; i < mb_block_count; i++) {
2366             int j;
2367             if (s->block_last_index[i] > 0) {
2368                 for (j = 63; j > 0; j--) {
2369                     if (s->block[i][s->intra_scantable.permutated[j]])
2370                         break;
2371                 }
2372                 s->block_last_index[i] = j;
2373             }
2374         }
2375     }
2376
2377     /* huffman encode */
2378     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2379     case AV_CODEC_ID_MPEG1VIDEO:
2380     case AV_CODEC_ID_MPEG2VIDEO:
2381         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2382             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2383         break;
2384     case AV_CODEC_ID_MPEG4:
2385         if (CONFIG_MPEG4_ENCODER)
2386             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2387         break;
2388     case AV_CODEC_ID_MSMPEG4V2:
2389     case AV_CODEC_ID_MSMPEG4V3:
2390     case AV_CODEC_ID_WMV1:
2391         if (CONFIG_MSMPEG4_ENCODER)
2392             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2393         break;
2394     case AV_CODEC_ID_WMV2:
2395         if (CONFIG_WMV2_ENCODER)
2396             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2397         break;
2398     case AV_CODEC_ID_H261:
2399         if (CONFIG_H261_ENCODER)
2400             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2401         break;
2402     case AV_CODEC_ID_H263:
2403     case AV_CODEC_ID_H263P:
2404     case AV_CODEC_ID_FLV1:
2405     case AV_CODEC_ID_RV10:
2406     case AV_CODEC_ID_RV20:
2407         if (CONFIG_H263_ENCODER)
2408             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2409         break;
2410     case AV_CODEC_ID_MJPEG:
2411     case AV_CODEC_ID_AMV:
2412         if (CONFIG_MJPEG_ENCODER)
2413             ff_mjpeg_encode_mb(s, s->block);
2414         break;
2415     default:
2416         av_assert1(0);
2417     }
2418 }
2419
2420 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2421 {
2422     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2423     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2424     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2425 }
2426
2427 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2428     int i;
2429
2430     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2431
2432     /* mpeg1 */
2433     d->mb_skip_run= s->mb_skip_run;
2434     for(i=0; i<3; i++)
2435         d->last_dc[i] = s->last_dc[i];
2436
2437     /* statistics */
2438     d->mv_bits= s->mv_bits;
2439     d->i_tex_bits= s->i_tex_bits;
2440     d->p_tex_bits= s->p_tex_bits;
2441     d->i_count= s->i_count;
2442     d->f_count= s->f_count;
2443     d->b_count= s->b_count;
2444     d->skip_count= s->skip_count;
2445     d->misc_bits= s->misc_bits;
2446     d->last_bits= 0;
2447
2448     d->mb_skipped= 0;
2449     d->qscale= s->qscale;
2450     d->dquant= s->dquant;
2451
2452     d->esc3_level_length= s->esc3_level_length;
2453 }
2454
2455 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2456     int i;
2457
2458     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2459     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2460
2461     /* mpeg1 */
2462     d->mb_skip_run= s->mb_skip_run;
2463     for(i=0; i<3; i++)
2464         d->last_dc[i] = s->last_dc[i];
2465
2466     /* statistics */
2467     d->mv_bits= s->mv_bits;
2468     d->i_tex_bits= s->i_tex_bits;
2469     d->p_tex_bits= s->p_tex_bits;
2470     d->i_count= s->i_count;
2471     d->f_count= s->f_count;
2472     d->b_count= s->b_count;
2473     d->skip_count= s->skip_count;
2474     d->misc_bits= s->misc_bits;
2475
2476     d->mb_intra= s->mb_intra;
2477     d->mb_skipped= s->mb_skipped;
2478     d->mv_type= s->mv_type;
2479     d->mv_dir= s->mv_dir;
2480     d->pb= s->pb;
2481     if(s->data_partitioning){
2482         d->pb2= s->pb2;
2483         d->tex_pb= s->tex_pb;
2484     }
2485     d->block= s->block;
2486     for(i=0; i<8; i++)
2487         d->block_last_index[i]= s->block_last_index[i];
2488     d->interlaced_dct= s->interlaced_dct;
2489     d->qscale= s->qscale;
2490
2491     d->esc3_level_length= s->esc3_level_length;
2492 }
2493
2494 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2495                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2496                            int *dmin, int *next_block, int motion_x, int motion_y)
2497 {
2498     int score;
2499     uint8_t *dest_backup[3];
2500
2501     copy_context_before_encode(s, backup, type);
2502
2503     s->block= s->blocks[*next_block];
2504     s->pb= pb[*next_block];
2505     if(s->data_partitioning){
2506         s->pb2   = pb2   [*next_block];
2507         s->tex_pb= tex_pb[*next_block];
2508     }
2509
2510     if(*next_block){
2511         memcpy(dest_backup, s->dest, sizeof(s->dest));
2512         s->dest[0] = s->rd_scratchpad;
2513         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2514         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2515         av_assert0(s->linesize >= 32); //FIXME
2516     }
2517
2518     encode_mb(s, motion_x, motion_y);
2519
2520     score= put_bits_count(&s->pb);
2521     if(s->data_partitioning){
2522         score+= put_bits_count(&s->pb2);
2523         score+= put_bits_count(&s->tex_pb);
2524     }
2525
2526     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2527         ff_mpv_decode_mb(s, s->block);
2528
2529         score *= s->lambda2;
2530         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2531     }
2532
2533     if(*next_block){
2534         memcpy(s->dest, dest_backup, sizeof(s->dest));
2535     }
2536
2537     if(score<*dmin){
2538         *dmin= score;
2539         *next_block^=1;
2540
2541         copy_context_after_encode(best, s, type);
2542     }
2543 }
2544
2545 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2546     uint32_t *sq = ff_square_tab + 256;
2547     int acc=0;
2548     int x,y;
2549
2550     if(w==16 && h==16)
2551         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2552     else if(w==8 && h==8)
2553         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2554
2555     for(y=0; y<h; y++){
2556         for(x=0; x<w; x++){
2557             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2558         }
2559     }
2560
2561     av_assert2(acc>=0);
2562
2563     return acc;
2564 }
2565
2566 static int sse_mb(MpegEncContext *s){
2567     int w= 16;
2568     int h= 16;
2569
2570     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2571     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2572
2573     if(w==16 && h==16)
2574       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2575         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2576                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2577                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2578       }else{
2579         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2580                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2581                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2582       }
2583     else
2584         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2585                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2586                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2587 }
2588
2589 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2590     MpegEncContext *s= *(void**)arg;
2591
2592
2593     s->me.pre_pass=1;
2594     s->me.dia_size= s->avctx->pre_dia_size;
2595     s->first_slice_line=1;
2596     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2597         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2598             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2599         }
2600         s->first_slice_line=0;
2601     }
2602
2603     s->me.pre_pass=0;
2604
2605     return 0;
2606 }
2607
2608 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2609     MpegEncContext *s= *(void**)arg;
2610
2611     ff_check_alignment();
2612
2613     s->me.dia_size= s->avctx->dia_size;
2614     s->first_slice_line=1;
2615     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2616         s->mb_x=0; //for block init below
2617         ff_init_block_index(s);
2618         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2619             s->block_index[0]+=2;
2620             s->block_index[1]+=2;
2621             s->block_index[2]+=2;
2622             s->block_index[3]+=2;
2623
2624             /* compute motion vector & mb_type and store in context */
2625             if(s->pict_type==AV_PICTURE_TYPE_B)
2626                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2627             else
2628                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2629         }
2630         s->first_slice_line=0;
2631     }
2632     return 0;
2633 }
2634
2635 static int mb_var_thread(AVCodecContext *c, void *arg){
2636     MpegEncContext *s= *(void**)arg;
2637     int mb_x, mb_y;
2638
2639     ff_check_alignment();
2640
2641     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2642         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2643             int xx = mb_x * 16;
2644             int yy = mb_y * 16;
2645             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2646             int varc;
2647             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2648
2649             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2650                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2651
2652             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2653             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2654             s->me.mb_var_sum_temp    += varc;
2655         }
2656     }
2657     return 0;
2658 }
2659
2660 static void write_slice_end(MpegEncContext *s){
2661     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2662         if(s->partitioned_frame){
2663             ff_mpeg4_merge_partitions(s);
2664         }
2665
2666         ff_mpeg4_stuffing(&s->pb);
2667     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2668         ff_mjpeg_encode_stuffing(s);
2669     }
2670
2671     avpriv_align_put_bits(&s->pb);
2672     flush_put_bits(&s->pb);
2673
2674     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2675         s->misc_bits+= get_bits_diff(s);
2676 }
2677
2678 static void write_mb_info(MpegEncContext *s)
2679 {
2680     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2681     int offset = put_bits_count(&s->pb);
2682     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2683     int gobn = s->mb_y / s->gob_index;
2684     int pred_x, pred_y;
2685     if (CONFIG_H263_ENCODER)
2686         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2687     bytestream_put_le32(&ptr, offset);
2688     bytestream_put_byte(&ptr, s->qscale);
2689     bytestream_put_byte(&ptr, gobn);
2690     bytestream_put_le16(&ptr, mba);
2691     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2692     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2693     /* 4MV not implemented */
2694     bytestream_put_byte(&ptr, 0); /* hmv2 */
2695     bytestream_put_byte(&ptr, 0); /* vmv2 */
2696 }
2697
2698 static void update_mb_info(MpegEncContext *s, int startcode)
2699 {
2700     if (!s->mb_info)
2701         return;
2702     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2703         s->mb_info_size += 12;
2704         s->prev_mb_info = s->last_mb_info;
2705     }
2706     if (startcode) {
2707         s->prev_mb_info = put_bits_count(&s->pb)/8;
2708         /* This might have incremented mb_info_size above, and we return without
2709          * actually writing any info into that slot yet. But in that case,
2710          * this will be called again at the start of the after writing the
2711          * start code, actually writing the mb info. */
2712         return;
2713     }
2714
2715     s->last_mb_info = put_bits_count(&s->pb)/8;
2716     if (!s->mb_info_size)
2717         s->mb_info_size += 12;
2718     write_mb_info(s);
2719 }
2720
2721 static int encode_thread(AVCodecContext *c, void *arg){
2722     MpegEncContext *s= *(void**)arg;
2723     int mb_x, mb_y, pdif = 0;
2724     int chr_h= 16>>s->chroma_y_shift;
2725     int i, j;
2726     MpegEncContext best_s, backup_s;
2727     uint8_t bit_buf[2][MAX_MB_BYTES];
2728     uint8_t bit_buf2[2][MAX_MB_BYTES];
2729     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2730     PutBitContext pb[2], pb2[2], tex_pb[2];
2731
2732     ff_check_alignment();
2733
2734     for(i=0; i<2; i++){
2735         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2736         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2737         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2738     }
2739
2740     s->last_bits= put_bits_count(&s->pb);
2741     s->mv_bits=0;
2742     s->misc_bits=0;
2743     s->i_tex_bits=0;
2744     s->p_tex_bits=0;
2745     s->i_count=0;
2746     s->f_count=0;
2747     s->b_count=0;
2748     s->skip_count=0;
2749
2750     for(i=0; i<3; i++){
2751         /* init last dc values */
2752         /* note: quant matrix value (8) is implied here */
2753         s->last_dc[i] = 128 << s->intra_dc_precision;
2754
2755         s->current_picture.error[i] = 0;
2756     }
2757     if(s->codec_id==AV_CODEC_ID_AMV){
2758         s->last_dc[0] = 128*8/13;
2759         s->last_dc[1] = 128*8/14;
2760         s->last_dc[2] = 128*8/14;
2761     }
2762     s->mb_skip_run = 0;
2763     memset(s->last_mv, 0, sizeof(s->last_mv));
2764
2765     s->last_mv_dir = 0;
2766
2767     switch(s->codec_id){
2768     case AV_CODEC_ID_H263:
2769     case AV_CODEC_ID_H263P:
2770     case AV_CODEC_ID_FLV1:
2771         if (CONFIG_H263_ENCODER)
2772             s->gob_index = ff_h263_get_gob_height(s);
2773         break;
2774     case AV_CODEC_ID_MPEG4:
2775         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2776             ff_mpeg4_init_partitions(s);
2777         break;
2778     }
2779
2780     s->resync_mb_x=0;
2781     s->resync_mb_y=0;
2782     s->first_slice_line = 1;
2783     s->ptr_lastgob = s->pb.buf;
2784     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2785         s->mb_x=0;
2786         s->mb_y= mb_y;
2787
2788         ff_set_qscale(s, s->qscale);
2789         ff_init_block_index(s);
2790
2791         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2792             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2793             int mb_type= s->mb_type[xy];
2794 //            int d;
2795             int dmin= INT_MAX;
2796             int dir;
2797
2798             if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES
2799                 && s->slice_context_count == 1
2800                 && s->pb.buf == s->avctx->internal->byte_buffer) {
2801                 int new_size =  s->avctx->internal->byte_buffer_size
2802                               + s->avctx->internal->byte_buffer_size/4
2803                               + s->mb_width*MAX_MB_BYTES;
2804                 int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2805                 int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2806
2807                 uint8_t *new_buffer = NULL;
2808                 int new_buffer_size = 0;
2809
2810                 av_fast_padded_malloc(&new_buffer, &new_buffer_size, new_size);
2811                 if (new_buffer) {
2812                     memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2813                     av_free(s->avctx->internal->byte_buffer);
2814                     s->avctx->internal->byte_buffer      = new_buffer;
2815                     s->avctx->internal->byte_buffer_size = new_buffer_size;
2816                     rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2817                     s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2818                     s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2819                 }
2820             }
2821             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2822                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2823                 return -1;
2824             }
2825             if(s->data_partitioning){
2826                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2827                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2828                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2829                     return -1;
2830                 }
2831             }
2832
2833             s->mb_x = mb_x;
2834             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2835             ff_update_block_index(s);
2836
2837             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2838                 ff_h261_reorder_mb_index(s);
2839                 xy= s->mb_y*s->mb_stride + s->mb_x;
2840                 mb_type= s->mb_type[xy];
2841             }
2842
2843             /* write gob / video packet header  */
2844             if(s->rtp_mode){
2845                 int current_packet_size, is_gob_start;
2846
2847                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2848
2849                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2850
2851                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2852
2853                 switch(s->codec_id){
2854                 case AV_CODEC_ID_H261:
2855                     is_gob_start=0;//FIXME
2856                     break;
2857                 case AV_CODEC_ID_H263:
2858                 case AV_CODEC_ID_H263P:
2859                     if(!s->h263_slice_structured)
2860                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2861                     break;
2862                 case AV_CODEC_ID_MPEG2VIDEO:
2863                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2864                 case AV_CODEC_ID_MPEG1VIDEO:
2865                     if(s->mb_skip_run) is_gob_start=0;
2866                     break;
2867                 case AV_CODEC_ID_MJPEG:
2868                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2869                     break;
2870                 }
2871
2872                 if(is_gob_start){
2873                     if(s->start_mb_y != mb_y || mb_x!=0){
2874                         write_slice_end(s);
2875
2876                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2877                             ff_mpeg4_init_partitions(s);
2878                         }
2879                     }
2880
2881                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2882                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2883
2884                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2885                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2886                         int d = 100 / s->error_rate;
2887                         if(r % d == 0){
2888                             current_packet_size=0;
2889                             s->pb.buf_ptr= s->ptr_lastgob;
2890                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2891                         }
2892                     }
2893
2894                     if (s->avctx->rtp_callback){
2895                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2896                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2897                     }
2898                     update_mb_info(s, 1);
2899
2900                     switch(s->codec_id){
2901                     case AV_CODEC_ID_MPEG4:
2902                         if (CONFIG_MPEG4_ENCODER) {
2903                             ff_mpeg4_encode_video_packet_header(s);
2904                             ff_mpeg4_clean_buffers(s);
2905                         }
2906                     break;
2907                     case AV_CODEC_ID_MPEG1VIDEO:
2908                     case AV_CODEC_ID_MPEG2VIDEO:
2909                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2910                             ff_mpeg1_encode_slice_header(s);
2911                             ff_mpeg1_clean_buffers(s);
2912                         }
2913                     break;
2914                     case AV_CODEC_ID_H263:
2915                     case AV_CODEC_ID_H263P:
2916                         if (CONFIG_H263_ENCODER)
2917                             ff_h263_encode_gob_header(s, mb_y);
2918                     break;
2919                     }
2920
2921                     if(s->flags&CODEC_FLAG_PASS1){
2922                         int bits= put_bits_count(&s->pb);
2923                         s->misc_bits+= bits - s->last_bits;
2924                         s->last_bits= bits;
2925                     }
2926
2927                     s->ptr_lastgob += current_packet_size;
2928                     s->first_slice_line=1;
2929                     s->resync_mb_x=mb_x;
2930                     s->resync_mb_y=mb_y;
2931                 }
2932             }
2933
2934             if(  (s->resync_mb_x   == s->mb_x)
2935                && s->resync_mb_y+1 == s->mb_y){
2936                 s->first_slice_line=0;
2937             }
2938
2939             s->mb_skipped=0;
2940             s->dquant=0; //only for QP_RD
2941
2942             update_mb_info(s, 0);
2943
2944             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2945                 int next_block=0;
2946                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2947
2948                 copy_context_before_encode(&backup_s, s, -1);
2949                 backup_s.pb= s->pb;
2950                 best_s.data_partitioning= s->data_partitioning;
2951                 best_s.partitioned_frame= s->partitioned_frame;
2952                 if(s->data_partitioning){
2953                     backup_s.pb2= s->pb2;
2954                     backup_s.tex_pb= s->tex_pb;
2955                 }
2956
2957                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2958                     s->mv_dir = MV_DIR_FORWARD;
2959                     s->mv_type = MV_TYPE_16X16;
2960                     s->mb_intra= 0;
2961                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2962                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2963                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2964                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2965                 }
2966                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2967                     s->mv_dir = MV_DIR_FORWARD;
2968                     s->mv_type = MV_TYPE_FIELD;
2969                     s->mb_intra= 0;
2970                     for(i=0; i<2; i++){
2971                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2972                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2973                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2974                     }
2975                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2976                                  &dmin, &next_block, 0, 0);
2977                 }
2978                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2979                     s->mv_dir = MV_DIR_FORWARD;
2980                     s->mv_type = MV_TYPE_16X16;
2981                     s->mb_intra= 0;
2982                     s->mv[0][0][0] = 0;
2983                     s->mv[0][0][1] = 0;
2984                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2985                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2986                 }
2987                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2988                     s->mv_dir = MV_DIR_FORWARD;
2989                     s->mv_type = MV_TYPE_8X8;
2990                     s->mb_intra= 0;
2991                     for(i=0; i<4; i++){
2992                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2993                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2994                     }
2995                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2996                                  &dmin, &next_block, 0, 0);
2997                 }
2998                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2999                     s->mv_dir = MV_DIR_FORWARD;
3000                     s->mv_type = MV_TYPE_16X16;
3001                     s->mb_intra= 0;
3002                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3003                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3004                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3005                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3006                 }
3007                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3008                     s->mv_dir = MV_DIR_BACKWARD;
3009                     s->mv_type = MV_TYPE_16X16;
3010                     s->mb_intra= 0;
3011                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3012                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3013                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3014                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3015                 }
3016                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3017                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3018                     s->mv_type = MV_TYPE_16X16;
3019                     s->mb_intra= 0;
3020                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3021                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3022                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3023                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3024                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3025                                  &dmin, &next_block, 0, 0);
3026                 }
3027                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3028                     s->mv_dir = MV_DIR_FORWARD;
3029                     s->mv_type = MV_TYPE_FIELD;
3030                     s->mb_intra= 0;
3031                     for(i=0; i<2; i++){
3032                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3033                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3034                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3035                     }
3036                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3037                                  &dmin, &next_block, 0, 0);
3038                 }
3039                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3040                     s->mv_dir = MV_DIR_BACKWARD;
3041                     s->mv_type = MV_TYPE_FIELD;
3042                     s->mb_intra= 0;
3043                     for(i=0; i<2; i++){
3044                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3045                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3046                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3047                     }
3048                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3049                                  &dmin, &next_block, 0, 0);
3050                 }
3051                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3052                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3053                     s->mv_type = MV_TYPE_FIELD;
3054                     s->mb_intra= 0;
3055                     for(dir=0; dir<2; dir++){
3056                         for(i=0; i<2; i++){
3057                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3058                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3059                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3060                         }
3061                     }
3062                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3063                                  &dmin, &next_block, 0, 0);
3064                 }
3065                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3066                     s->mv_dir = 0;
3067                     s->mv_type = MV_TYPE_16X16;
3068                     s->mb_intra= 1;
3069                     s->mv[0][0][0] = 0;
3070                     s->mv[0][0][1] = 0;
3071                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3072                                  &dmin, &next_block, 0, 0);
3073                     if(s->h263_pred || s->h263_aic){
3074                         if(best_s.mb_intra)
3075                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3076                         else
3077                             ff_clean_intra_table_entries(s); //old mode?
3078                     }
3079                 }
3080
3081                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3082                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3083                         const int last_qp= backup_s.qscale;
3084                         int qpi, qp, dc[6];
3085                         int16_t ac[6][16];
3086                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3087                         static const int dquant_tab[4]={-1,1,-2,2};
3088                         int storecoefs = s->mb_intra && s->dc_val[0];
3089
3090                         av_assert2(backup_s.dquant == 0);
3091
3092                         //FIXME intra
3093                         s->mv_dir= best_s.mv_dir;
3094                         s->mv_type = MV_TYPE_16X16;
3095                         s->mb_intra= best_s.mb_intra;
3096                         s->mv[0][0][0] = best_s.mv[0][0][0];
3097                         s->mv[0][0][1] = best_s.mv[0][0][1];
3098                         s->mv[1][0][0] = best_s.mv[1][0][0];
3099                         s->mv[1][0][1] = best_s.mv[1][0][1];
3100
3101                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3102                         for(; qpi<4; qpi++){
3103                             int dquant= dquant_tab[qpi];
3104                             qp= last_qp + dquant;
3105                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3106                                 continue;
3107                             backup_s.dquant= dquant;
3108                             if(storecoefs){
3109                                 for(i=0; i<6; i++){
3110                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3111                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3112                                 }
3113                             }
3114
3115                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3116                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3117                             if(best_s.qscale != qp){
3118                                 if(storecoefs){
3119                                     for(i=0; i<6; i++){
3120                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3121                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3122                                     }
3123                                 }
3124                             }
3125                         }
3126                     }
3127                 }
3128                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3129                     int mx= s->b_direct_mv_table[xy][0];
3130                     int my= s->b_direct_mv_table[xy][1];
3131
3132                     backup_s.dquant = 0;
3133                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3134                     s->mb_intra= 0;
3135                     ff_mpeg4_set_direct_mv(s, mx, my);
3136                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3137                                  &dmin, &next_block, mx, my);
3138                 }
3139                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3140                     backup_s.dquant = 0;
3141                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3142                     s->mb_intra= 0;
3143                     ff_mpeg4_set_direct_mv(s, 0, 0);
3144                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3145                                  &dmin, &next_block, 0, 0);
3146                 }
3147                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3148                     int coded=0;
3149                     for(i=0; i<6; i++)
3150                         coded |= s->block_last_index[i];
3151                     if(coded){
3152                         int mx,my;
3153                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3154                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3155                             mx=my=0; //FIXME find the one we actually used
3156                             ff_mpeg4_set_direct_mv(s, mx, my);
3157                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3158                             mx= s->mv[1][0][0];
3159                             my= s->mv[1][0][1];
3160                         }else{
3161                             mx= s->mv[0][0][0];
3162                             my= s->mv[0][0][1];
3163                         }
3164
3165                         s->mv_dir= best_s.mv_dir;
3166                         s->mv_type = best_s.mv_type;
3167                         s->mb_intra= 0;
3168 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3169                         s->mv[0][0][1] = best_s.mv[0][0][1];
3170                         s->mv[1][0][0] = best_s.mv[1][0][0];
3171                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3172                         backup_s.dquant= 0;
3173                         s->skipdct=1;
3174                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3175                                         &dmin, &next_block, mx, my);
3176                         s->skipdct=0;
3177                     }
3178                 }
3179
3180                 s->current_picture.qscale_table[xy] = best_s.qscale;
3181
3182                 copy_context_after_encode(s, &best_s, -1);
3183
3184                 pb_bits_count= put_bits_count(&s->pb);
3185                 flush_put_bits(&s->pb);
3186                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3187                 s->pb= backup_s.pb;
3188
3189                 if(s->data_partitioning){
3190                     pb2_bits_count= put_bits_count(&s->pb2);
3191                     flush_put_bits(&s->pb2);
3192                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3193                     s->pb2= backup_s.pb2;
3194
3195                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3196                     flush_put_bits(&s->tex_pb);
3197                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3198                     s->tex_pb= backup_s.tex_pb;
3199                 }
3200                 s->last_bits= put_bits_count(&s->pb);
3201
3202                 if (CONFIG_H263_ENCODER &&
3203                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3204                     ff_h263_update_motion_val(s);
3205
3206                 if(next_block==0){ //FIXME 16 vs linesize16
3207                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3208                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3209                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3210                 }
3211
3212                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3213                     ff_mpv_decode_mb(s, s->block);
3214             } else {
3215                 int motion_x = 0, motion_y = 0;
3216                 s->mv_type=MV_TYPE_16X16;
3217                 // only one MB-Type possible
3218
3219                 switch(mb_type){
3220                 case CANDIDATE_MB_TYPE_INTRA:
3221                     s->mv_dir = 0;
3222                     s->mb_intra= 1;
3223                     motion_x= s->mv[0][0][0] = 0;
3224                     motion_y= s->mv[0][0][1] = 0;
3225                     break;
3226                 case CANDIDATE_MB_TYPE_INTER:
3227                     s->mv_dir = MV_DIR_FORWARD;
3228                     s->mb_intra= 0;
3229                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3230                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3231                     break;
3232                 case CANDIDATE_MB_TYPE_INTER_I:
3233                     s->mv_dir = MV_DIR_FORWARD;
3234                     s->mv_type = MV_TYPE_FIELD;
3235                     s->mb_intra= 0;
3236                     for(i=0; i<2; i++){
3237                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3238                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3239                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3240                     }
3241                     break;
3242                 case CANDIDATE_MB_TYPE_INTER4V:
3243                     s->mv_dir = MV_DIR_FORWARD;
3244                     s->mv_type = MV_TYPE_8X8;
3245                     s->mb_intra= 0;
3246                     for(i=0; i<4; i++){
3247                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3248                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3249                     }
3250                     break;
3251                 case CANDIDATE_MB_TYPE_DIRECT:
3252                     if (CONFIG_MPEG4_ENCODER) {
3253                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3254                         s->mb_intra= 0;
3255                         motion_x=s->b_direct_mv_table[xy][0];
3256                         motion_y=s->b_direct_mv_table[xy][1];
3257                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3258                     }
3259                     break;
3260                 case CANDIDATE_MB_TYPE_DIRECT0:
3261                     if (CONFIG_MPEG4_ENCODER) {
3262                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3263                         s->mb_intra= 0;
3264                         ff_mpeg4_set_direct_mv(s, 0, 0);
3265                     }
3266                     break;
3267                 case CANDIDATE_MB_TYPE_BIDIR:
3268                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3269                     s->mb_intra= 0;
3270                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3271                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3272                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3273                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3274                     break;
3275                 case CANDIDATE_MB_TYPE_BACKWARD:
3276                     s->mv_dir = MV_DIR_BACKWARD;
3277                     s->mb_intra= 0;
3278                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3279                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3280                     break;
3281                 case CANDIDATE_MB_TYPE_FORWARD:
3282                     s->mv_dir = MV_DIR_FORWARD;
3283                     s->mb_intra= 0;
3284                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3285                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3286                     break;
3287                 case CANDIDATE_MB_TYPE_FORWARD_I:
3288                     s->mv_dir = MV_DIR_FORWARD;
3289                     s->mv_type = MV_TYPE_FIELD;
3290                     s->mb_intra= 0;
3291                     for(i=0; i<2; i++){
3292                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3293                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3294                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3295                     }
3296                     break;
3297                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3298                     s->mv_dir = MV_DIR_BACKWARD;
3299                     s->mv_type = MV_TYPE_FIELD;
3300                     s->mb_intra= 0;
3301                     for(i=0; i<2; i++){
3302                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3303                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3304                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3305                     }
3306                     break;
3307                 case CANDIDATE_MB_TYPE_BIDIR_I:
3308                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3309                     s->mv_type = MV_TYPE_FIELD;
3310                     s->mb_intra= 0;
3311                     for(dir=0; dir<2; dir++){
3312                         for(i=0; i<2; i++){
3313                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3314                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3315                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3316                         }
3317                     }
3318                     break;
3319                 default:
3320                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3321                 }
3322
3323                 encode_mb(s, motion_x, motion_y);
3324
3325                 // RAL: Update last macroblock type
3326                 s->last_mv_dir = s->mv_dir;
3327
3328                 if (CONFIG_H263_ENCODER &&
3329                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3330                     ff_h263_update_motion_val(s);
3331
3332                 ff_mpv_decode_mb(s, s->block);
3333             }
3334
3335             /* clean the MV table in IPS frames for direct mode in B frames */
3336             if(s->mb_intra /* && I,P,S_TYPE */){
3337                 s->p_mv_table[xy][0]=0;
3338                 s->p_mv_table[xy][1]=0;
3339             }
3340
3341             if(s->flags&CODEC_FLAG_PSNR){
3342                 int w= 16;
3343                 int h= 16;
3344
3345                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3346                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3347
3348                 s->current_picture.error[0] += sse(
3349                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3350                     s->dest[0], w, h, s->linesize);
3351                 s->current_picture.error[1] += sse(
3352                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3353                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3354                 s->current_picture.error[2] += sse(
3355                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3356                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3357             }
3358             if(s->loop_filter){
3359                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3360                     ff_h263_loop_filter(s);
3361             }
3362             av_dlog(s->avctx, "MB %d %d bits\n",
3363                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3364         }
3365     }
3366
3367     //not beautiful here but we must write it before flushing so it has to be here
3368     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3369         ff_msmpeg4_encode_ext_header(s);
3370
3371     write_slice_end(s);
3372
3373     /* Send the last GOB if RTP */
3374     if (s->avctx->rtp_callback) {
3375         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3376         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3377         /* Call the RTP callback to send the last GOB */
3378         emms_c();
3379         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3380     }
3381
3382     return 0;
3383 }
3384
3385 #define MERGE(field) dst->field += src->field; src->field=0
3386 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3387     MERGE(me.scene_change_score);
3388     MERGE(me.mc_mb_var_sum_temp);
3389     MERGE(me.mb_var_sum_temp);
3390 }
3391
3392 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3393     int i;
3394
3395     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3396     MERGE(dct_count[1]);
3397     MERGE(mv_bits);
3398     MERGE(i_tex_bits);
3399     MERGE(p_tex_bits);
3400     MERGE(i_count);
3401     MERGE(f_count);
3402     MERGE(b_count);
3403     MERGE(skip_count);
3404     MERGE(misc_bits);
3405     MERGE(er.error_count);
3406     MERGE(padding_bug_score);
3407     MERGE(current_picture.error[0]);
3408     MERGE(current_picture.error[1]);
3409     MERGE(current_picture.error[2]);
3410
3411     if(dst->avctx->noise_reduction){
3412         for(i=0; i<64; i++){
3413             MERGE(dct_error_sum[0][i]);
3414             MERGE(dct_error_sum[1][i]);
3415         }
3416     }
3417
3418     assert(put_bits_count(&src->pb) % 8 ==0);
3419     assert(put_bits_count(&dst->pb) % 8 ==0);
3420     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3421     flush_put_bits(&dst->pb);
3422 }
3423
3424 static int estimate_qp(MpegEncContext *s, int dry_run){
3425     if (s->next_lambda){
3426         s->current_picture_ptr->f->quality =
3427         s->current_picture.f->quality = s->next_lambda;
3428         if(!dry_run) s->next_lambda= 0;
3429     } else if (!s->fixed_qscale) {
3430         s->current_picture_ptr->f->quality =
3431         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3432         if (s->current_picture.f->quality < 0)
3433             return -1;
3434     }
3435
3436     if(s->adaptive_quant){
3437         switch(s->codec_id){
3438         case AV_CODEC_ID_MPEG4:
3439             if (CONFIG_MPEG4_ENCODER)
3440                 ff_clean_mpeg4_qscales(s);
3441             break;
3442         case AV_CODEC_ID_H263:
3443         case AV_CODEC_ID_H263P:
3444         case AV_CODEC_ID_FLV1:
3445             if (CONFIG_H263_ENCODER)
3446                 ff_clean_h263_qscales(s);
3447             break;
3448         default:
3449             ff_init_qscale_tab(s);
3450         }
3451
3452         s->lambda= s->lambda_table[0];
3453         //FIXME broken
3454     }else
3455         s->lambda = s->current_picture.f->quality;
3456     update_qscale(s);
3457     return 0;
3458 }
3459
3460 /* must be called before writing the header */
3461 static void set_frame_distances(MpegEncContext * s){
3462     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3463     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3464
3465     if(s->pict_type==AV_PICTURE_TYPE_B){
3466         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3467         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3468     }else{
3469         s->pp_time= s->time - s->last_non_b_time;
3470         s->last_non_b_time= s->time;
3471         assert(s->picture_number==0 || s->pp_time > 0);
3472     }
3473 }
3474
3475 static int encode_picture(MpegEncContext *s, int picture_number)
3476 {
3477     int i, ret;
3478     int bits;
3479     int context_count = s->slice_context_count;
3480
3481     s->picture_number = picture_number;
3482
3483     /* Reset the average MB variance */
3484     s->me.mb_var_sum_temp    =
3485     s->me.mc_mb_var_sum_temp = 0;
3486
3487     /* we need to initialize some time vars before we can encode b-frames */
3488     // RAL: Condition added for MPEG1VIDEO
3489     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3490         set_frame_distances(s);
3491     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3492         ff_set_mpeg4_time(s);
3493
3494     s->me.scene_change_score=0;
3495
3496 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3497
3498     if(s->pict_type==AV_PICTURE_TYPE_I){
3499         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3500         else                        s->no_rounding=0;
3501     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3502         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3503             s->no_rounding ^= 1;
3504     }
3505
3506     if(s->flags & CODEC_FLAG_PASS2){
3507         if (estimate_qp(s,1) < 0)
3508             return -1;
3509         ff_get_2pass_fcode(s);
3510     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3511         if(s->pict_type==AV_PICTURE_TYPE_B)
3512             s->lambda= s->last_lambda_for[s->pict_type];
3513         else
3514             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3515         update_qscale(s);
3516     }
3517
3518     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3519         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3520         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3521         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3522         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3523     }
3524
3525     s->mb_intra=0; //for the rate distortion & bit compare functions
3526     for(i=1; i<context_count; i++){
3527         ret = ff_update_duplicate_context(s->thread_context[i], s);
3528         if (ret < 0)
3529             return ret;
3530     }
3531
3532     if(ff_init_me(s)<0)
3533         return -1;
3534
3535     /* Estimate motion for every MB */
3536     if(s->pict_type != AV_PICTURE_TYPE_I){
3537         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3538         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3539         if (s->pict_type != AV_PICTURE_TYPE_B) {
3540             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3541                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3542             }
3543         }
3544
3545         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3546     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3547         /* I-Frame */
3548         for(i=0; i<s->mb_stride*s->mb_height; i++)
3549             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3550
3551         if(!s->fixed_qscale){
3552             /* finding spatial complexity for I-frame rate control */
3553             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3554         }
3555     }
3556     for(i=1; i<context_count; i++){
3557         merge_context_after_me(s, s->thread_context[i]);
3558     }
3559     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3560     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3561     emms_c();
3562
3563     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3564         s->pict_type= AV_PICTURE_TYPE_I;
3565         for(i=0; i<s->mb_stride*s->mb_height; i++)
3566             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3567         if(s->msmpeg4_version >= 3)
3568             s->no_rounding=1;
3569         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3570                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3571     }
3572
3573     if(!s->umvplus){
3574         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3575             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3576
3577             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3578                 int a,b;
3579                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3580                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3581                 s->f_code= FFMAX3(s->f_code, a, b);
3582             }
3583
3584             ff_fix_long_p_mvs(s);
3585             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3586             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3587                 int j;
3588                 for(i=0; i<2; i++){
3589                     for(j=0; j<2; j++)
3590                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3591                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3592                 }
3593             }
3594         }
3595
3596         if(s->pict_type==AV_PICTURE_TYPE_B){
3597             int a, b;
3598
3599             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3600             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3601             s->f_code = FFMAX(a, b);
3602
3603             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3604             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3605             s->b_code = FFMAX(a, b);
3606
3607             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3608             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3609             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3610             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3611             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3612                 int dir, j;
3613                 for(dir=0; dir<2; dir++){
3614                     for(i=0; i<2; i++){
3615                         for(j=0; j<2; j++){
3616                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3617                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3618                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3619                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3620                         }
3621                     }
3622                 }
3623             }
3624         }
3625     }
3626
3627     if (estimate_qp(s, 0) < 0)
3628         return -1;
3629
3630     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3631         s->qscale= 3; //reduce clipping problems
3632
3633     if (s->out_format == FMT_MJPEG) {
3634         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3635         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3636
3637         if (s->avctx->intra_matrix) {
3638             chroma_matrix =
3639             luma_matrix = s->avctx->intra_matrix;
3640         }
3641         if (s->avctx->chroma_intra_matrix)
3642             chroma_matrix = s->avctx->chroma_intra_matrix;
3643
3644         /* for mjpeg, we do include qscale in the matrix */
3645         for(i=1;i<64;i++){
3646             int j = s->idsp.idct_permutation[i];
3647
3648             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3649             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3650         }
3651         s->y_dc_scale_table=
3652         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3653         s->chroma_intra_matrix[0] =
3654         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3655         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3656                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3657         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3658                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3659         s->qscale= 8;
3660     }
3661     if(s->codec_id == AV_CODEC_ID_AMV){
3662         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3663         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3664         for(i=1;i<64;i++){
3665             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3666
3667             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3668             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3669         }
3670         s->y_dc_scale_table= y;
3671         s->c_dc_scale_table= c;
3672         s->intra_matrix[0] = 13;
3673         s->chroma_intra_matrix[0] = 14;
3674         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3675                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3676         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3677                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3678         s->qscale= 8;
3679     }
3680
3681     //FIXME var duplication
3682     s->current_picture_ptr->f->key_frame =
3683     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3684     s->current_picture_ptr->f->pict_type =
3685     s->current_picture.f->pict_type = s->pict_type;
3686
3687     if (s->current_picture.f->key_frame)
3688         s->picture_in_gop_number=0;
3689
3690     s->mb_x = s->mb_y = 0;
3691     s->last_bits= put_bits_count(&s->pb);
3692     switch(s->out_format) {
3693     case FMT_MJPEG:
3694         if (CONFIG_MJPEG_ENCODER)
3695             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3696                                            s->intra_matrix, s->chroma_intra_matrix);
3697         break;
3698     case FMT_H261:
3699         if (CONFIG_H261_ENCODER)
3700             ff_h261_encode_picture_header(s, picture_number);
3701         break;
3702     case FMT_H263:
3703         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3704             ff_wmv2_encode_picture_header(s, picture_number);
3705         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3706             ff_msmpeg4_encode_picture_header(s, picture_number);
3707         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3708             ff_mpeg4_encode_picture_header(s, picture_number);
3709         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3710             ff_rv10_encode_picture_header(s, picture_number);
3711         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3712             ff_rv20_encode_picture_header(s, picture_number);
3713         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3714             ff_flv_encode_picture_header(s, picture_number);
3715         else if (CONFIG_H263_ENCODER)
3716             ff_h263_encode_picture_header(s, picture_number);
3717         break;
3718     case FMT_MPEG1:
3719         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3720             ff_mpeg1_encode_picture_header(s, picture_number);
3721         break;
3722     default:
3723         av_assert0(0);
3724     }
3725     bits= put_bits_count(&s->pb);
3726     s->header_bits= bits - s->last_bits;
3727
3728     for(i=1; i<context_count; i++){
3729         update_duplicate_context_after_me(s->thread_context[i], s);
3730     }
3731     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3732     for(i=1; i<context_count; i++){
3733         merge_context_after_encode(s, s->thread_context[i]);
3734     }
3735     emms_c();
3736     return 0;
3737 }
3738
3739 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3740     const int intra= s->mb_intra;
3741     int i;
3742
3743     s->dct_count[intra]++;
3744
3745     for(i=0; i<64; i++){
3746         int level= block[i];
3747
3748         if(level){
3749             if(level>0){
3750                 s->dct_error_sum[intra][i] += level;
3751                 level -= s->dct_offset[intra][i];
3752                 if(level<0) level=0;
3753             }else{
3754                 s->dct_error_sum[intra][i] -= level;
3755                 level += s->dct_offset[intra][i];
3756                 if(level>0) level=0;
3757             }
3758             block[i]= level;
3759         }
3760     }
3761 }
3762
3763 static int dct_quantize_trellis_c(MpegEncContext *s,
3764                                   int16_t *block, int n,
3765                                   int qscale, int *overflow){
3766     const int *qmat;
3767     const uint8_t *scantable= s->intra_scantable.scantable;
3768     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3769     int max=0;
3770     unsigned int threshold1, threshold2;
3771     int bias=0;
3772     int run_tab[65];
3773     int level_tab[65];
3774     int score_tab[65];
3775     int survivor[65];
3776     int survivor_count;
3777     int last_run=0;
3778     int last_level=0;
3779     int last_score= 0;
3780     int last_i;
3781     int coeff[2][64];
3782     int coeff_count[64];
3783     int qmul, qadd, start_i, last_non_zero, i, dc;
3784     const int esc_length= s->ac_esc_length;
3785     uint8_t * length;
3786     uint8_t * last_length;
3787     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3788
3789     s->fdsp.fdct(block);
3790
3791     if(s->dct_error_sum)
3792         s->denoise_dct(s, block);
3793     qmul= qscale*16;
3794     qadd= ((qscale-1)|1)*8;
3795
3796     if (s->mb_intra) {
3797         int q;
3798         if (!s->h263_aic) {
3799             if (n < 4)
3800                 q = s->y_dc_scale;
3801             else
3802                 q = s->c_dc_scale;
3803             q = q << 3;
3804         } else{
3805             /* For AIC we skip quant/dequant of INTRADC */
3806             q = 1 << 3;
3807             qadd=0;
3808         }
3809
3810         /* note: block[0] is assumed to be positive */
3811         block[0] = (block[0] + (q >> 1)) / q;
3812         start_i = 1;
3813         last_non_zero = 0;
3814         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3815         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3816             bias= 1<<(QMAT_SHIFT-1);
3817         length     = s->intra_ac_vlc_length;
3818         last_length= s->intra_ac_vlc_last_length;
3819     } else {
3820         start_i = 0;
3821         last_non_zero = -1;
3822         qmat = s->q_inter_matrix[qscale];
3823         length     = s->inter_ac_vlc_length;
3824         last_length= s->inter_ac_vlc_last_length;
3825     }
3826     last_i= start_i;
3827
3828     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3829     threshold2= (threshold1<<1);
3830
3831     for(i=63; i>=start_i; i--) {
3832         const int j = scantable[i];
3833         int level = block[j] * qmat[j];
3834
3835         if(((unsigned)(level+threshold1))>threshold2){
3836             last_non_zero = i;
3837             break;
3838         }
3839     }
3840
3841     for(i=start_i; i<=last_non_zero; i++) {
3842         const int j = scantable[i];
3843         int level = block[j] * qmat[j];
3844
3845 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3846 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3847         if(((unsigned)(level+threshold1))>threshold2){
3848             if(level>0){
3849                 level= (bias + level)>>QMAT_SHIFT;
3850                 coeff[0][i]= level;
3851                 coeff[1][i]= level-1;
3852 //                coeff[2][k]= level-2;
3853             }else{
3854                 level= (bias - level)>>QMAT_SHIFT;
3855                 coeff[0][i]= -level;
3856                 coeff[1][i]= -level+1;
3857 //                coeff[2][k]= -level+2;
3858             }
3859             coeff_count[i]= FFMIN(level, 2);
3860             av_assert2(coeff_count[i]);
3861             max |=level;
3862         }else{
3863             coeff[0][i]= (level>>31)|1;
3864             coeff_count[i]= 1;
3865         }
3866     }
3867
3868     *overflow= s->max_qcoeff < max; //overflow might have happened
3869
3870     if(last_non_zero < start_i){
3871         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3872         return last_non_zero;
3873     }
3874
3875     score_tab[start_i]= 0;
3876     survivor[0]= start_i;
3877     survivor_count= 1;
3878
3879     for(i=start_i; i<=last_non_zero; i++){
3880         int level_index, j, zero_distortion;
3881         int dct_coeff= FFABS(block[ scantable[i] ]);
3882         int best_score=256*256*256*120;
3883
3884         if (s->fdsp.fdct == ff_fdct_ifast)
3885             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3886         zero_distortion= dct_coeff*dct_coeff;
3887
3888         for(level_index=0; level_index < coeff_count[i]; level_index++){
3889             int distortion;
3890             int level= coeff[level_index][i];
3891             const int alevel= FFABS(level);
3892             int unquant_coeff;
3893
3894             av_assert2(level);
3895
3896             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3897                 unquant_coeff= alevel*qmul + qadd;
3898             }else{ //MPEG1
3899                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3900                 if(s->mb_intra){
3901                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3902                         unquant_coeff =   (unquant_coeff - 1) | 1;
3903                 }else{
3904                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3905                         unquant_coeff =   (unquant_coeff - 1) | 1;
3906                 }
3907                 unquant_coeff<<= 3;
3908             }
3909
3910             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3911             level+=64;
3912             if((level&(~127)) == 0){
3913                 for(j=survivor_count-1; j>=0; j--){
3914                     int run= i - survivor[j];
3915                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3916                     score += score_tab[i-run];
3917
3918                     if(score < best_score){
3919                         best_score= score;
3920                         run_tab[i+1]= run;
3921                         level_tab[i+1]= level-64;
3922                     }
3923                 }
3924
3925                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3926                     for(j=survivor_count-1; j>=0; j--){
3927                         int run= i - survivor[j];
3928                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3929                         score += score_tab[i-run];
3930                         if(score < last_score){
3931                             last_score= score;
3932                             last_run= run;
3933                             last_level= level-64;
3934                             last_i= i+1;
3935                         }
3936                     }
3937                 }
3938             }else{
3939                 distortion += esc_length*lambda;
3940                 for(j=survivor_count-1; j>=0; j--){
3941                     int run= i - survivor[j];
3942                     int score= distortion + score_tab[i-run];
3943
3944                     if(score < best_score){
3945                         best_score= score;
3946                         run_tab[i+1]= run;
3947                         level_tab[i+1]= level-64;
3948                     }
3949                 }
3950
3951                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3952                   for(j=survivor_count-1; j>=0; j--){
3953                         int run= i - survivor[j];
3954                         int score= distortion + score_tab[i-run];
3955                         if(score < last_score){
3956                             last_score= score;
3957                             last_run= run;
3958                             last_level= level-64;
3959                             last_i= i+1;
3960                         }
3961                     }
3962                 }
3963             }
3964         }
3965
3966         score_tab[i+1]= best_score;
3967
3968         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3969         if(last_non_zero <= 27){
3970             for(; survivor_count; survivor_count--){
3971                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3972                     break;
3973             }
3974         }else{
3975             for(; survivor_count; survivor_count--){
3976                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3977                     break;
3978             }
3979         }
3980
3981         survivor[ survivor_count++ ]= i+1;
3982     }
3983
3984     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3985         last_score= 256*256*256*120;
3986         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3987             int score= score_tab[i];
3988             if(i) score += lambda*2; //FIXME exacter?
3989
3990             if(score < last_score){
3991                 last_score= score;
3992                 last_i= i;
3993                 last_level= level_tab[i];
3994                 last_run= run_tab[i];
3995             }
3996         }
3997     }
3998
3999     s->coded_score[n] = last_score;
4000
4001     dc= FFABS(block[0]);
4002     last_non_zero= last_i - 1;
4003     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4004
4005     if(last_non_zero < start_i)
4006         return last_non_zero;
4007
4008     if(last_non_zero == 0 && start_i == 0){
4009         int best_level= 0;
4010         int best_score= dc * dc;
4011
4012         for(i=0; i<coeff_count[0]; i++){
4013             int level= coeff[i][0];
4014             int alevel= FFABS(level);
4015             int unquant_coeff, score, distortion;
4016
4017             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4018                     unquant_coeff= (alevel*qmul + qadd)>>3;
4019             }else{ //MPEG1
4020                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
4021                     unquant_coeff =   (unquant_coeff - 1) | 1;
4022             }
4023             unquant_coeff = (unquant_coeff + 4) >> 3;
4024             unquant_coeff<<= 3 + 3;
4025
4026             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4027             level+=64;
4028             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4029             else                    score= distortion + esc_length*lambda;
4030
4031             if(score < best_score){
4032                 best_score= score;
4033                 best_level= level - 64;
4034             }
4035         }
4036         block[0]= best_level;
4037         s->coded_score[n] = best_score - dc*dc;
4038         if(best_level == 0) return -1;
4039         else                return last_non_zero;
4040     }
4041
4042     i= last_i;
4043     av_assert2(last_level);
4044
4045     block[ perm_scantable[last_non_zero] ]= last_level;
4046     i -= last_run + 1;
4047
4048     for(; i>start_i; i -= run_tab[i] + 1){
4049         block[ perm_scantable[i-1] ]= level_tab[i];
4050     }
4051
4052     return last_non_zero;
4053 }
4054
4055 //#define REFINE_STATS 1
4056 static int16_t basis[64][64];
4057
4058 static void build_basis(uint8_t *perm){
4059     int i, j, x, y;
4060     emms_c();
4061     for(i=0; i<8; i++){
4062         for(j=0; j<8; j++){
4063             for(y=0; y<8; y++){
4064                 for(x=0; x<8; x++){
4065                     double s= 0.25*(1<<BASIS_SHIFT);
4066                     int index= 8*i + j;
4067                     int perm_index= perm[index];
4068                     if(i==0) s*= sqrt(0.5);
4069                     if(j==0) s*= sqrt(0.5);
4070                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4071                 }
4072             }
4073         }
4074     }
4075 }
4076
4077 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4078                         int16_t *block, int16_t *weight, int16_t *orig,
4079                         int n, int qscale){
4080     int16_t rem[64];
4081     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4082     const uint8_t *scantable= s->intra_scantable.scantable;
4083     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4084 //    unsigned int threshold1, threshold2;
4085 //    int bias=0;
4086     int run_tab[65];
4087     int prev_run=0;
4088     int prev_level=0;
4089     int qmul, qadd, start_i, last_non_zero, i, dc;
4090     uint8_t * length;
4091     uint8_t * last_length;
4092     int lambda;
4093     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4094 #ifdef REFINE_STATS
4095 static int count=0;
4096 static int after_last=0;
4097 static int to_zero=0;
4098 static int from_zero=0;
4099 static int raise=0;
4100 static int lower=0;
4101 static int messed_sign=0;
4102 #endif
4103
4104     if(basis[0][0] == 0)
4105         build_basis(s->idsp.idct_permutation);
4106
4107     qmul= qscale*2;
4108     qadd= (qscale-1)|1;
4109     if (s->mb_intra) {
4110         if (!s->h263_aic) {
4111             if (n < 4)
4112                 q = s->y_dc_scale;
4113             else
4114                 q = s->c_dc_scale;
4115         } else{
4116             /* For AIC we skip quant/dequant of INTRADC */
4117             q = 1;
4118             qadd=0;
4119         }
4120         q <<= RECON_SHIFT-3;
4121         /* note: block[0] is assumed to be positive */
4122         dc= block[0]*q;
4123 //        block[0] = (block[0] + (q >> 1)) / q;
4124         start_i = 1;
4125 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4126 //            bias= 1<<(QMAT_SHIFT-1);
4127         length     = s->intra_ac_vlc_length;
4128         last_length= s->intra_ac_vlc_last_length;
4129     } else {
4130         dc= 0;
4131         start_i = 0;
4132         length     = s->inter_ac_vlc_length;
4133         last_length= s->inter_ac_vlc_last_length;
4134     }
4135     last_non_zero = s->block_last_index[n];
4136
4137 #ifdef REFINE_STATS
4138 {START_TIMER
4139 #endif
4140     dc += (1<<(RECON_SHIFT-1));
4141     for(i=0; i<64; i++){
4142         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4143     }
4144 #ifdef REFINE_STATS
4145 STOP_TIMER("memset rem[]")}
4146 #endif
4147     sum=0;
4148     for(i=0; i<64; i++){
4149         int one= 36;
4150         int qns=4;
4151         int w;
4152
4153         w= FFABS(weight[i]) + qns*one;
4154         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4155
4156         weight[i] = w;
4157 //        w=weight[i] = (63*qns + (w/2)) / w;
4158
4159         av_assert2(w>0);
4160         av_assert2(w<(1<<6));
4161         sum += w*w;
4162     }
4163     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4164 #ifdef REFINE_STATS
4165 {START_TIMER
4166 #endif
4167     run=0;
4168     rle_index=0;
4169     for(i=start_i; i<=last_non_zero; i++){
4170         int j= perm_scantable[i];
4171         const int level= block[j];
4172         int coeff;
4173
4174         if(level){
4175             if(level<0) coeff= qmul*level - qadd;
4176             else        coeff= qmul*level + qadd;
4177             run_tab[rle_index++]=run;
4178             run=0;
4179
4180             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4181         }else{
4182             run++;
4183         }
4184     }
4185 #ifdef REFINE_STATS
4186 if(last_non_zero>0){
4187 STOP_TIMER("init rem[]")
4188 }
4189 }
4190
4191 {START_TIMER
4192 #endif
4193     for(;;){
4194         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4195         int best_coeff=0;
4196         int best_change=0;
4197         int run2, best_unquant_change=0, analyze_gradient;
4198 #ifdef REFINE_STATS
4199 {START_TIMER
4200 #endif
4201         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4202
4203         if(analyze_gradient){
4204 #ifdef REFINE_STATS
4205 {START_TIMER
4206 #endif
4207             for(i=0; i<64; i++){
4208                 int w= weight[i];
4209
4210                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4211             }
4212 #ifdef REFINE_STATS
4213 STOP_TIMER("rem*w*w")}
4214 {START_TIMER
4215 #endif
4216             s->fdsp.fdct(d1);
4217 #ifdef REFINE_STATS
4218 STOP_TIMER("dct")}
4219 #endif
4220         }
4221
4222         if(start_i){
4223             const int level= block[0];
4224             int change, old_coeff;
4225
4226             av_assert2(s->mb_intra);
4227
4228             old_coeff= q*level;
4229
4230             for(change=-1; change<=1; change+=2){
4231                 int new_level= level + change;
4232                 int score, new_coeff;
4233
4234                 new_coeff= q*new_level;
4235                 if(new_coeff >= 2048 || new_coeff < 0)
4236                     continue;
4237
4238                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4239                                                   new_coeff - old_coeff);
4240                 if(score<best_score){
4241                     best_score= score;
4242                     best_coeff= 0;
4243                     best_change= change;
4244                     best_unquant_change= new_coeff - old_coeff;
4245                 }
4246             }
4247         }
4248
4249         run=0;
4250         rle_index=0;
4251         run2= run_tab[rle_index++];
4252         prev_level=0;
4253         prev_run=0;
4254
4255         for(i=start_i; i<64; i++){
4256             int j= perm_scantable[i];
4257             const int level= block[j];
4258             int change, old_coeff;
4259
4260             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4261                 break;
4262
4263             if(level){
4264                 if(level<0) old_coeff= qmul*level - qadd;
4265                 else        old_coeff= qmul*level + qadd;
4266                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4267             }else{
4268                 old_coeff=0;
4269                 run2--;
4270                 av_assert2(run2>=0 || i >= last_non_zero );
4271             }
4272
4273             for(change=-1; change<=1; change+=2){
4274                 int new_level= level + change;
4275                 int score, new_coeff, unquant_change;
4276
4277                 score=0;
4278                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4279                    continue;
4280
4281                 if(new_level){
4282                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4283                     else            new_coeff= qmul*new_level + qadd;
4284                     if(new_coeff >= 2048 || new_coeff <= -2048)
4285                         continue;
4286                     //FIXME check for overflow
4287
4288                     if(level){
4289                         if(level < 63 && level > -63){
4290                             if(i < last_non_zero)
4291                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4292                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4293                             else
4294                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4295                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4296                         }
4297                     }else{
4298                         av_assert2(FFABS(new_level)==1);
4299
4300                         if(analyze_gradient){
4301                             int g= d1[ scantable[i] ];
4302                             if(g && (g^new_level) >= 0)
4303                                 continue;
4304                         }
4305
4306                         if(i < last_non_zero){
4307                             int next_i= i + run2 + 1;
4308                             int next_level= block[ perm_scantable[next_i] ] + 64;
4309
4310                             if(next_level&(~127))
4311                                 next_level= 0;
4312
4313                             if(next_i < last_non_zero)
4314                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4315                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4316                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4317                             else
4318                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4319                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4320                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4321                         }else{
4322                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4323                             if(prev_level){
4324                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4325                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4326                             }
4327                         }
4328                     }
4329                 }else{
4330                     new_coeff=0;
4331                     av_assert2(FFABS(level)==1);
4332
4333                     if(i < last_non_zero){
4334                         int next_i= i + run2 + 1;
4335                         int next_level= block[ perm_scantable[next_i] ] + 64;
4336
4337                         if(next_level&(~127))
4338                             next_level= 0;
4339
4340                         if(next_i < last_non_zero)
4341                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4342                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4343                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4344                         else
4345                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4346                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4347                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4348                     }else{
4349                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4350                         if(prev_level){
4351                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4352                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4353                         }
4354                     }
4355                 }
4356
4357                 score *= lambda;
4358
4359                 unquant_change= new_coeff - old_coeff;
4360                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4361
4362                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4363                                                    unquant_change);
4364                 if(score<best_score){
4365                     best_score= score;
4366                     best_coeff= i;
4367                     best_change= change;
4368                     best_unquant_change= unquant_change;
4369                 }
4370             }
4371             if(level){
4372                 prev_level= level + 64;
4373                 if(prev_level&(~127))
4374                     prev_level= 0;
4375                 prev_run= run;
4376                 run=0;
4377             }else{
4378                 run++;
4379             }
4380         }
4381 #ifdef REFINE_STATS
4382 STOP_TIMER("iterative step")}
4383 #endif
4384
4385         if(best_change){
4386             int j= perm_scantable[ best_coeff ];
4387
4388             block[j] += best_change;
4389
4390             if(best_coeff > last_non_zero){
4391                 last_non_zero= best_coeff;
4392                 av_assert2(block[j]);
4393 #ifdef REFINE_STATS
4394 after_last++;
4395 #endif
4396             }else{
4397 #ifdef REFINE_STATS
4398 if(block[j]){
4399     if(block[j] - best_change){
4400         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4401             raise++;
4402         }else{
4403             lower++;
4404         }
4405     }else{
4406         from_zero++;
4407     }
4408 }else{
4409     to_zero++;
4410 }
4411 #endif
4412                 for(; last_non_zero>=start_i; last_non_zero--){
4413                     if(block[perm_scantable[last_non_zero]])
4414                         break;
4415                 }
4416             }
4417 #ifdef REFINE_STATS
4418 count++;
4419 if(256*256*256*64 % count == 0){
4420     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4421 }
4422 #endif
4423             run=0;
4424             rle_index=0;
4425             for(i=start_i; i<=last_non_zero; i++){
4426                 int j= perm_scantable[i];
4427                 const int level= block[j];
4428
4429                  if(level){
4430                      run_tab[rle_index++]=run;
4431                      run=0;
4432                  }else{
4433                      run++;
4434                  }
4435             }
4436
4437             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4438         }else{
4439             break;
4440         }
4441     }
4442 #ifdef REFINE_STATS
4443 if(last_non_zero>0){
4444 STOP_TIMER("iterative search")
4445 }
4446 }
4447 #endif
4448
4449     return last_non_zero;
4450 }
4451
4452 int ff_dct_quantize_c(MpegEncContext *s,
4453                         int16_t *block, int n,
4454                         int qscale, int *overflow)
4455 {
4456     int i, j, level, last_non_zero, q, start_i;
4457     const int *qmat;
4458     const uint8_t *scantable= s->intra_scantable.scantable;
4459     int bias;
4460     int max=0;
4461     unsigned int threshold1, threshold2;
4462
4463     s->fdsp.fdct(block);
4464
4465     if(s->dct_error_sum)
4466         s->denoise_dct(s, block);
4467
4468     if (s->mb_intra) {
4469         if (!s->h263_aic) {
4470             if (n < 4)
4471                 q = s->y_dc_scale;
4472             else
4473                 q = s->c_dc_scale;
4474             q = q << 3;
4475         } else
4476             /* For AIC we skip quant/dequant of INTRADC */
4477             q = 1 << 3;
4478
4479         /* note: block[0] is assumed to be positive */
4480         block[0] = (block[0] + (q >> 1)) / q;
4481         start_i = 1;
4482         last_non_zero = 0;
4483         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4484         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4485     } else {
4486         start_i = 0;
4487         last_non_zero = -1;
4488         qmat = s->q_inter_matrix[qscale];
4489         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4490     }
4491     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4492     threshold2= (threshold1<<1);
4493     for(i=63;i>=start_i;i--) {
4494         j = scantable[i];
4495         level = block[j] * qmat[j];
4496
4497         if(((unsigned)(level+threshold1))>threshold2){
4498             last_non_zero = i;
4499             break;
4500         }else{
4501             block[j]=0;
4502         }
4503     }
4504     for(i=start_i; i<=last_non_zero; i++) {
4505         j = scantable[i];
4506         level = block[j] * qmat[j];
4507
4508 //        if(   bias+level >= (1<<QMAT_SHIFT)
4509 //           || bias-level >= (1<<QMAT_SHIFT)){
4510         if(((unsigned)(level+threshold1))>threshold2){
4511             if(level>0){
4512                 level= (bias + level)>>QMAT_SHIFT;
4513                 block[j]= level;
4514             }else{
4515                 level= (bias - level)>>QMAT_SHIFT;
4516                 block[j]= -level;
4517             }
4518             max |=level;
4519         }else{
4520             block[j]=0;
4521         }
4522     }
4523     *overflow= s->max_qcoeff < max; //overflow might have happened
4524
4525     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4526     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4527         ff_block_permute(block, s->idsp.idct_permutation,
4528                          scantable, last_non_zero);
4529
4530     return last_non_zero;
4531 }
4532
4533 #define OFFSET(x) offsetof(MpegEncContext, x)
4534 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4535 static const AVOption h263_options[] = {
4536     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4537     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4538     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4539     FF_MPV_COMMON_OPTS
4540     { NULL },
4541 };
4542
4543 static const AVClass h263_class = {
4544     .class_name = "H.263 encoder",
4545     .item_name  = av_default_item_name,
4546     .option     = h263_options,
4547     .version    = LIBAVUTIL_VERSION_INT,
4548 };
4549
4550 AVCodec ff_h263_encoder = {
4551     .name           = "h263",
4552     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4553     .type           = AVMEDIA_TYPE_VIDEO,
4554     .id             = AV_CODEC_ID_H263,
4555     .priv_data_size = sizeof(MpegEncContext),
4556     .init           = ff_mpv_encode_init,
4557     .encode2        = ff_mpv_encode_picture,
4558     .close          = ff_mpv_encode_end,
4559     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4560     .priv_class     = &h263_class,
4561 };
4562
4563 static const AVOption h263p_options[] = {
4564     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4565     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4566     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4567     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4568     FF_MPV_COMMON_OPTS
4569     { NULL },
4570 };
4571 static const AVClass h263p_class = {
4572     .class_name = "H.263p encoder",
4573     .item_name  = av_default_item_name,
4574     .option     = h263p_options,
4575     .version    = LIBAVUTIL_VERSION_INT,
4576 };
4577
4578 AVCodec ff_h263p_encoder = {
4579     .name           = "h263p",
4580     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4581     .type           = AVMEDIA_TYPE_VIDEO,
4582     .id             = AV_CODEC_ID_H263P,
4583     .priv_data_size = sizeof(MpegEncContext),
4584     .init           = ff_mpv_encode_init,
4585     .encode2        = ff_mpv_encode_picture,
4586     .close          = ff_mpv_encode_end,
4587     .capabilities   = CODEC_CAP_SLICE_THREADS,
4588     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4589     .priv_class     = &h263p_class,
4590 };
4591
4592 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4593
4594 AVCodec ff_msmpeg4v2_encoder = {
4595     .name           = "msmpeg4v2",
4596     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4597     .type           = AVMEDIA_TYPE_VIDEO,
4598     .id             = AV_CODEC_ID_MSMPEG4V2,
4599     .priv_data_size = sizeof(MpegEncContext),
4600     .init           = ff_mpv_encode_init,
4601     .encode2        = ff_mpv_encode_picture,
4602     .close          = ff_mpv_encode_end,
4603     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4604     .priv_class     = &msmpeg4v2_class,
4605 };
4606
4607 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4608
4609 AVCodec ff_msmpeg4v3_encoder = {
4610     .name           = "msmpeg4",
4611     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4612     .type           = AVMEDIA_TYPE_VIDEO,
4613     .id             = AV_CODEC_ID_MSMPEG4V3,
4614     .priv_data_size = sizeof(MpegEncContext),
4615     .init           = ff_mpv_encode_init,
4616     .encode2        = ff_mpv_encode_picture,
4617     .close          = ff_mpv_encode_end,
4618     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4619     .priv_class     = &msmpeg4v3_class,
4620 };
4621
4622 FF_MPV_GENERIC_CLASS(wmv1)
4623
4624 AVCodec ff_wmv1_encoder = {
4625     .name           = "wmv1",
4626     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4627     .type           = AVMEDIA_TYPE_VIDEO,
4628     .id             = AV_CODEC_ID_WMV1,
4629     .priv_data_size = sizeof(MpegEncContext),
4630     .init           = ff_mpv_encode_init,
4631     .encode2        = ff_mpv_encode_picture,
4632     .close          = ff_mpv_encode_end,
4633     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4634     .priv_class     = &wmv1_class,
4635 };