git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "h261.h"
  44 #include "h263.h"
  45 #include "mjpegenc_common.h"
  46 #include "mathops.h"
  47 #include "mpegutils.h"
  48 #include "mjpegenc.h"
  49 #include "msmpeg4.h"
  50 #include "pixblockdsp.h"
  51 #include "qpeldsp.h"
  52 #include "faandct.h"
  53 #include "thread.h"
  54 #include "aandcttab.h"
  55 #include "flv.h"
  56 #include "mpeg4video.h"
  57 #include "internal.h"
  58 #include "bytestream.h"
  59 #include <limits.h>
  60 #include "sp5x.h"
  61
  62 #define QUANT_BIAS_SHIFT 8
  63
  64 #define QMAT_SHIFT_MMX 16
  65 #define QMAT_SHIFT 21
  66
  67 static int encode_picture(MpegEncContext *s, int picture_number);
  68 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  69 static int sse_mb(MpegEncContext *s);
  70 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  71 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  72
  73 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  74 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  75
  76 const AVOption ff_mpv_generic_options[] = {
  77     FF_MPV_COMMON_OPTS
  78     { NULL },
  79 };
  80
  81 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  82                        uint16_t (*qmat16)[2][64],
  83                        const uint16_t *quant_matrix,
  84                        int bias, int qmin, int qmax, int intra)
  85 {
  86     FDCTDSPContext *fdsp = &s->fdsp;
  87     int qscale;
  88     int shift = 0;
  89
  90     for (qscale = qmin; qscale <= qmax; qscale++) {
  91         int i;
  92         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  93 #if CONFIG_FAANDCT
  94             fdsp->fdct == ff_faandct            ||
  95 #endif /* CONFIG_FAANDCT */
  96             fdsp->fdct == ff_jpeg_fdct_islow_10) {
  97             for (i = 0; i < 64; i++) {
  98                 const int j = s->idsp.idct_permutation[i];
  99                 int64_t den = (int64_t) qscale * quant_matrix[j];
 100                 /* 16 <= qscale * quant_matrix[i] <= 7905
 101                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 102                  *             19952 <=              x  <= 249205026
 103                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 104                  *           3444240 >= (1 << 36) / (x) >= 275 */
 105
 106                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 107             }
 108         } else if (fdsp->fdct == ff_fdct_ifast) {
 109             for (i = 0; i < 64; i++) {
 110                 const int j = s->idsp.idct_permutation[i];
 111                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
 112                 /* 16 <= qscale * quant_matrix[i] <= 7905
 113                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 114                  *             19952 <=              x  <= 249205026
 115                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 116                  *           3444240 >= (1 << 36) / (x) >= 275 */
 117
 118                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
 119             }
 120         } else {
 121             for (i = 0; i < 64; i++) {
 122                 const int j = s->idsp.idct_permutation[i];
 123                 int64_t den = (int64_t) qscale * quant_matrix[j];
 124                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 125                  * Assume x = qscale * quant_matrix[i]
 126                  * So             16 <=              x  <= 7905
 127                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 128                  * so          32768 >= (1 << 19) / (x) >= 67 */
 129                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 130                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 131                 //                    (qscale * quant_matrix[i]);
 132                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
 133
 134                 if (qmat16[qscale][0][i] == 0 ||
 135                     qmat16[qscale][0][i] == 128 * 256)
 136                     qmat16[qscale][0][i] = 128 * 256 - 1;
 137                 qmat16[qscale][1][i] =
 138                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 139                                 qmat16[qscale][0][i]);
 140             }
 141         }
 142
 143         for (i = intra; i < 64; i++) {
 144             int64_t max = 8191;
 145             if (fdsp->fdct == ff_fdct_ifast) {
 146                 max = (8191LL * ff_aanscales[i]) >> 14;
 147             }
 148             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 149                 shift++;
 150             }
 151         }
 152     }
 153     if (shift) {
 154         av_log(NULL, AV_LOG_INFO,
 155                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 156                QMAT_SHIFT - shift);
 157     }
 158 }
 159
 160 static inline void update_qscale(MpegEncContext *s)
 161 {
 162     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 163                 (FF_LAMBDA_SHIFT + 7);
 164     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 165
 166     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 167                  FF_LAMBDA_SHIFT;
 168 }
 169
 170 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 171 {
 172     int i;
 173
 174     if (matrix) {
 175         put_bits(pb, 1, 1);
 176         for (i = 0; i < 64; i++) {
 177             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 178         }
 179     } else
 180         put_bits(pb, 1, 0);
 181 }
 182
 183 /**
 184  * init s->current_picture.qscale_table from s->lambda_table
 185  */
 186 void ff_init_qscale_tab(MpegEncContext *s)
 187 {
 188     int8_t * const qscale_table = s->current_picture.qscale_table;
 189     int i;
 190
 191     for (i = 0; i < s->mb_num; i++) {
 192         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 193         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 194         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 195                                                   s->avctx->qmax);
 196     }
 197 }
 198
 199 static void update_duplicate_context_after_me(MpegEncContext *dst,
 200                                               MpegEncContext *src)
 201 {
 202 #define COPY(a) dst->a= src->a
 203     COPY(pict_type);
 204     COPY(current_picture);
 205     COPY(f_code);
 206     COPY(b_code);
 207     COPY(qscale);
 208     COPY(lambda);
 209     COPY(lambda2);
 210     COPY(picture_in_gop_number);
 211     COPY(gop_picture_number);
 212     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 213     COPY(progressive_frame);    // FIXME don't set in encode_header
 214     COPY(partitioned_frame);    // FIXME don't set in encode_header
 215 #undef COPY
 216 }
 217
 218 /**
 219  * Set the given MpegEncContext to defaults for encoding.
 220  * the changed fields will not depend upon the prior state of the MpegEncContext.
 221  */
 222 static void mpv_encode_defaults(MpegEncContext *s)
 223 {
 224     int i;
 225     ff_mpv_common_defaults(s);
 226
 227     for (i = -16; i < 16; i++) {
 228         default_fcode_tab[i + MAX_MV] = 1;
 229     }
 230     s->me.mv_penalty = default_mv_penalty;
 231     s->fcode_tab     = default_fcode_tab;
 232
 233     s->input_picture_number  = 0;
 234     s->picture_in_gop_number = 0;
 235 }
 236
 237 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 238     if (ARCH_X86)
 239         ff_dct_encode_init_x86(s);
 240
 241     if (CONFIG_H263_ENCODER)
 242         ff_h263dsp_init(&s->h263dsp);
 243     if (!s->dct_quantize)
 244         s->dct_quantize = ff_dct_quantize_c;
 245     if (!s->denoise_dct)
 246         s->denoise_dct  = denoise_dct_c;
 247     s->fast_dct_quantize = s->dct_quantize;
 248     if (s->avctx->trellis)
 249         s->dct_quantize  = dct_quantize_trellis_c;
 250
 251     return 0;
 252 }
 253
 254 /* init video encoder */
 255 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 256 {
 257     MpegEncContext *s = avctx->priv_data;
 258     int i, ret, format_supported;
 259
 260     mpv_encode_defaults(s);
 261
 262     switch (avctx->codec_id) {
 263     case AV_CODEC_ID_MPEG2VIDEO:
 264         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 265             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 266             av_log(avctx, AV_LOG_ERROR,
 267                    "only YUV420 and YUV422 are supported\n");
 268             return -1;
 269         }
 270         break;
 271     case AV_CODEC_ID_MJPEG:
 272     case AV_CODEC_ID_AMV:
 273         format_supported = 0;
 274         /* JPEG color space */
 275         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 276             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 277             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
 278             (avctx->color_range == AVCOL_RANGE_JPEG &&
 279              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 280               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 281               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
 282             format_supported = 1;
 283         /* MPEG color space */
 284         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 285                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 286                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 287                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
 288             format_supported = 1;
 289
 290         if (!format_supported) {
 291             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 292             return -1;
 293         }
 294         break;
 295     default:
 296         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 297             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 298             return -1;
 299         }
 300     }
 301
 302     switch (avctx->pix_fmt) {
 303     case AV_PIX_FMT_YUVJ444P:
 304     case AV_PIX_FMT_YUV444P:
 305         s->chroma_format = CHROMA_444;
 306         break;
 307     case AV_PIX_FMT_YUVJ422P:
 308     case AV_PIX_FMT_YUV422P:
 309         s->chroma_format = CHROMA_422;
 310         break;
 311     case AV_PIX_FMT_YUVJ420P:
 312     case AV_PIX_FMT_YUV420P:
 313     default:
 314         s->chroma_format = CHROMA_420;
 315         break;
 316     }
 317
 318     s->bit_rate = avctx->bit_rate;
 319     s->width    = avctx->width;
 320     s->height   = avctx->height;
 321     if (avctx->gop_size > 600 &&
 322         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 323         av_log(avctx, AV_LOG_WARNING,
 324                "keyframe interval too large!, reducing it from %d to %d\n",
 325                avctx->gop_size, 600);
 326         avctx->gop_size = 600;
 327     }
 328     s->gop_size     = avctx->gop_size;
 329     s->avctx        = avctx;
 330     s->flags        = avctx->flags;
 331     s->flags2       = avctx->flags2;
 332     if (avctx->max_b_frames > MAX_B_FRAMES) {
 333         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 334                "is %d.\n", MAX_B_FRAMES);
 335         avctx->max_b_frames = MAX_B_FRAMES;
 336     }
 337     s->max_b_frames = avctx->max_b_frames;
 338     s->codec_id     = avctx->codec->id;
 339     s->strict_std_compliance = avctx->strict_std_compliance;
 340     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 341     s->mpeg_quant         = avctx->mpeg_quant;
 342     s->rtp_mode           = !!avctx->rtp_payload_size;
 343     s->intra_dc_precision = avctx->intra_dc_precision;
 344
 345     // workaround some differences between how applications specify dc precision
 346     if (s->intra_dc_precision < 0) {
 347         s->intra_dc_precision += 8;
 348     } else if (s->intra_dc_precision >= 8)
 349         s->intra_dc_precision -= 8;
 350
 351     if (s->intra_dc_precision < 0) {
 352         av_log(avctx, AV_LOG_ERROR,
 353                 "intra dc precision must be positive, note some applications use"
 354                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
 355         return AVERROR(EINVAL);
 356     }
 357
 358     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
 359         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
 360         return AVERROR(EINVAL);
 361     }
 362     s->user_specified_pts = AV_NOPTS_VALUE;
 363
 364     if (s->gop_size <= 1) {
 365         s->intra_only = 1;
 366         s->gop_size   = 12;
 367     } else {
 368         s->intra_only = 0;
 369     }
 370
 371     s->me_method = avctx->me_method;
 372
 373     /* Fixed QSCALE */
 374     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 375
 376 #if FF_API_MPV_OPT
 377     FF_DISABLE_DEPRECATION_WARNINGS
 378     if (avctx->border_masking != 0.0)
 379         s->border_masking = avctx->border_masking;
 380     FF_ENABLE_DEPRECATION_WARNINGS
 381 #endif
 382
 383     s->adaptive_quant = (s->avctx->lumi_masking ||
 384                          s->avctx->dark_masking ||
 385                          s->avctx->temporal_cplx_masking ||
 386                          s->avctx->spatial_cplx_masking  ||
 387                          s->avctx->p_masking      ||
 388                          s->border_masking ||
 389                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 390                         !s->fixed_qscale;
 391
 392     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 393
 394     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 395         switch(avctx->codec_id) {
 396         case AV_CODEC_ID_MPEG1VIDEO:
 397         case AV_CODEC_ID_MPEG2VIDEO:
 398             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
 399             break;
 400         case AV_CODEC_ID_MPEG4:
 401         case AV_CODEC_ID_MSMPEG4V1:
 402         case AV_CODEC_ID_MSMPEG4V2:
 403         case AV_CODEC_ID_MSMPEG4V3:
 404             if       (avctx->rc_max_rate >= 15000000) {
 405                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
 406             } else if(avctx->rc_max_rate >=  2000000) {
 407                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
 408             } else if(avctx->rc_max_rate >=   384000) {
 409                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
 410             } else
 411                 avctx->rc_buffer_size = 40;
 412             avctx->rc_buffer_size *= 16384;
 413             break;
 414         }
 415         if (avctx->rc_buffer_size) {
 416             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 417         }
 418     }
 419
 420     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 421         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 422         return -1;
 423     }
 424
 425     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 426         av_log(avctx, AV_LOG_INFO,
 427                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 428     }
 429
 430     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 431         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 432         return -1;
 433     }
 434
 435     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 436         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 437         return -1;
 438     }
 439
 440     if (avctx->rc_max_rate &&
 441         avctx->rc_max_rate == avctx->bit_rate &&
 442         avctx->rc_max_rate != avctx->rc_min_rate) {
 443         av_log(avctx, AV_LOG_INFO,
 444                "impossible bitrate constraints, this will fail\n");
 445     }
 446
 447     if (avctx->rc_buffer_size &&
 448         avctx->bit_rate * (int64_t)avctx->time_base.num >
 449             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 450         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 451         return -1;
 452     }
 453
 454     if (!s->fixed_qscale &&
 455         avctx->bit_rate * av_q2d(avctx->time_base) >
 456             avctx->bit_rate_tolerance) {
 457         av_log(avctx, AV_LOG_WARNING,
 458                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 459         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 460     }
 461
 462     if (s->avctx->rc_max_rate &&
 463         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 464         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 465          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 466         90000LL * (avctx->rc_buffer_size - 1) >
 467             s->avctx->rc_max_rate * 0xFFFFLL) {
 468         av_log(avctx, AV_LOG_INFO,
 469                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 470                "specified vbv buffer is too large for the given bitrate!\n");
 471     }
 472
 473     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 474         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 475         s->codec_id != AV_CODEC_ID_FLV1) {
 476         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 477         return -1;
 478     }
 479
 480     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 481         av_log(avctx, AV_LOG_ERROR,
 482                "OBMC is only supported with simple mb decision\n");
 483         return -1;
 484     }
 485
 486     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 487         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 488         return -1;
 489     }
 490
 491     if (s->max_b_frames                    &&
 492         s->codec_id != AV_CODEC_ID_MPEG4      &&
 493         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 494         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 495         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 496         return -1;
 497     }
 498     if (s->max_b_frames < 0) {
 499         av_log(avctx, AV_LOG_ERROR,
 500                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 501         return -1;
 502     }
 503
 504     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 505          s->codec_id == AV_CODEC_ID_H263  ||
 506          s->codec_id == AV_CODEC_ID_H263P) &&
 507         (avctx->sample_aspect_ratio.num > 255 ||
 508          avctx->sample_aspect_ratio.den > 255)) {
 509         av_log(avctx, AV_LOG_WARNING,
 510                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 511                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 512         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 513                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 514     }
 515
 516     if ((s->codec_id == AV_CODEC_ID_H263  ||
 517          s->codec_id == AV_CODEC_ID_H263P) &&
 518         (avctx->width  > 2048 ||
 519          avctx->height > 1152 )) {
 520         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 521         return -1;
 522     }
 523     if ((s->codec_id == AV_CODEC_ID_H263  ||
 524          s->codec_id == AV_CODEC_ID_H263P) &&
 525         ((avctx->width &3) ||
 526          (avctx->height&3) )) {
 527         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 528         return -1;
 529     }
 530
 531     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 532         (avctx->width  > 4095 ||
 533          avctx->height > 4095 )) {
 534         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 535         return -1;
 536     }
 537
 538     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 539         (avctx->width  > 16383 ||
 540          avctx->height > 16383 )) {
 541         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 542         return -1;
 543     }
 544
 545     if (s->codec_id == AV_CODEC_ID_RV10 &&
 546         (avctx->width &15 ||
 547          avctx->height&15 )) {
 548         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 549         return AVERROR(EINVAL);
 550     }
 551
 552     if (s->codec_id == AV_CODEC_ID_RV20 &&
 553         (avctx->width &3 ||
 554          avctx->height&3 )) {
 555         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 556         return AVERROR(EINVAL);
 557     }
 558
 559     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 560          s->codec_id == AV_CODEC_ID_WMV2) &&
 561          avctx->width & 1) {
 562          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 563          return -1;
 564     }
 565
 566     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 567         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 568         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 569         return -1;
 570     }
 571
 572     // FIXME mpeg2 uses that too
 573     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 574                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 575         av_log(avctx, AV_LOG_ERROR,
 576                "mpeg2 style quantization not supported by codec\n");
 577         return -1;
 578     }
 579
 580     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 581         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 582         return -1;
 583     }
 584
 585     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 586         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 587         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 588         return -1;
 589     }
 590
 591     if (s->avctx->scenechange_threshold < 1000000000 &&
 592         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 593         av_log(avctx, AV_LOG_ERROR,
 594                "closed gop with scene change detection are not supported yet, "
 595                "set threshold to 1000000000\n");
 596         return -1;
 597     }
 598
 599     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 600         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 601             av_log(avctx, AV_LOG_ERROR,
 602                   "low delay forcing is only available for mpeg2\n");
 603             return -1;
 604         }
 605         if (s->max_b_frames != 0) {
 606             av_log(avctx, AV_LOG_ERROR,
 607                    "b frames cannot be used with low delay\n");
 608             return -1;
 609         }
 610     }
 611
 612     if (s->q_scale_type == 1) {
 613         if (avctx->qmax > 12) {
 614             av_log(avctx, AV_LOG_ERROR,
 615                    "non linear quant only supports qmax <= 12 currently\n");
 616             return -1;
 617         }
 618     }
 619
 620     if (s->avctx->thread_count > 1         &&
 621         s->codec_id != AV_CODEC_ID_MPEG4      &&
 622         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 623         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 624         s->codec_id != AV_CODEC_ID_MJPEG      &&
 625         (s->codec_id != AV_CODEC_ID_H263P)) {
 626         av_log(avctx, AV_LOG_ERROR,
 627                "multi threaded encoding not supported by codec\n");
 628         return -1;
 629     }
 630
 631     if (s->avctx->thread_count < 1) {
 632         av_log(avctx, AV_LOG_ERROR,
 633                "automatic thread number detection not supported by codec, "
 634                "patch welcome\n");
 635         return -1;
 636     }
 637
 638     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 639         s->rtp_mode = 1;
 640
 641     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 642         s->h263_slice_structured = 1;
 643
 644     if (!avctx->time_base.den || !avctx->time_base.num) {
 645         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 646         return -1;
 647     }
 648
 649     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 650         av_log(avctx, AV_LOG_INFO,
 651                "notice: b_frame_strategy only affects the first pass\n");
 652         avctx->b_frame_strategy = 0;
 653     }
 654
 655     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 656     if (i > 1) {
 657         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 658         avctx->time_base.den /= i;
 659         avctx->time_base.num /= i;
 660         //return -1;
 661     }
 662
 663     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 664         // (a + x * 3 / 8) / x
 665         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 666         s->inter_quant_bias = 0;
 667     } else {
 668         s->intra_quant_bias = 0;
 669         // (a - x / 4) / x
 670         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 671     }
 672
 673     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 674         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 675         return AVERROR(EINVAL);
 676     }
 677
 678     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 679         s->intra_quant_bias = avctx->intra_quant_bias;
 680     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 681         s->inter_quant_bias = avctx->inter_quant_bias;
 682
 683     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 684
 685     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 686         s->avctx->time_base.den > (1 << 16) - 1) {
 687         av_log(avctx, AV_LOG_ERROR,
 688                "timebase %d/%d not supported by MPEG 4 standard, "
 689                "the maximum admitted value for the timebase denominator "
 690                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 691                (1 << 16) - 1);
 692         return -1;
 693     }
 694     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 695
 696     switch (avctx->codec->id) {
 697     case AV_CODEC_ID_MPEG1VIDEO:
 698         s->out_format = FMT_MPEG1;
 699         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 700         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 701         break;
 702     case AV_CODEC_ID_MPEG2VIDEO:
 703         s->out_format = FMT_MPEG1;
 704         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 705         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 706         s->rtp_mode   = 1;
 707         break;
 708     case AV_CODEC_ID_MJPEG:
 709     case AV_CODEC_ID_AMV:
 710         s->out_format = FMT_MJPEG;
 711         s->intra_only = 1; /* force intra only for jpeg */
 712         if (!CONFIG_MJPEG_ENCODER ||
 713             ff_mjpeg_encode_init(s) < 0)
 714             return -1;
 715         avctx->delay = 0;
 716         s->low_delay = 1;
 717         break;
 718     case AV_CODEC_ID_H261:
 719         if (!CONFIG_H261_ENCODER)
 720             return -1;
 721         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 722             av_log(avctx, AV_LOG_ERROR,
 723                    "The specified picture size of %dx%d is not valid for the "
 724                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 725                     s->width, s->height);
 726             return -1;
 727         }
 728         s->out_format = FMT_H261;
 729         avctx->delay  = 0;
 730         s->low_delay  = 1;
 731         s->rtp_mode   = 0; /* Sliced encoding not supported */
 732         break;
 733     case AV_CODEC_ID_H263:
 734         if (!CONFIG_H263_ENCODER)
 735             return -1;
 736         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 737                              s->width, s->height) == 8) {
 738             av_log(avctx, AV_LOG_ERROR,
 739                    "The specified picture size of %dx%d is not valid for "
 740                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 741                    "352x288, 704x576, and 1408x1152. "
 742                    "Try H.263+.\n", s->width, s->height);
 743             return -1;
 744         }
 745         s->out_format = FMT_H263;
 746         avctx->delay  = 0;
 747         s->low_delay  = 1;
 748         break;
 749     case AV_CODEC_ID_H263P:
 750         s->out_format = FMT_H263;
 751         s->h263_plus  = 1;
 752         /* Fx */
 753         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 754         s->modified_quant  = s->h263_aic;
 755         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 756         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 757
 758         /* /Fx */
 759         /* These are just to be sure */
 760         avctx->delay = 0;
 761         s->low_delay = 1;
 762         break;
 763     case AV_CODEC_ID_FLV1:
 764         s->out_format      = FMT_H263;
 765         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 766         s->unrestricted_mv = 1;
 767         s->rtp_mode  = 0; /* don't allow GOB */
 768         avctx->delay = 0;
 769         s->low_delay = 1;
 770         break;
 771     case AV_CODEC_ID_RV10:
 772         s->out_format = FMT_H263;
 773         avctx->delay  = 0;
 774         s->low_delay  = 1;
 775         break;
 776     case AV_CODEC_ID_RV20:
 777         s->out_format      = FMT_H263;
 778         avctx->delay       = 0;
 779         s->low_delay       = 1;
 780         s->modified_quant  = 1;
 781         s->h263_aic        = 1;
 782         s->h263_plus       = 1;
 783         s->loop_filter     = 1;
 784         s->unrestricted_mv = 0;
 785         break;
 786     case AV_CODEC_ID_MPEG4:
 787         s->out_format      = FMT_H263;
 788         s->h263_pred       = 1;
 789         s->unrestricted_mv = 1;
 790         s->low_delay       = s->max_b_frames ? 0 : 1;
 791         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 792         break;
 793     case AV_CODEC_ID_MSMPEG4V2:
 794         s->out_format      = FMT_H263;
 795         s->h263_pred       = 1;
 796         s->unrestricted_mv = 1;
 797         s->msmpeg4_version = 2;
 798         avctx->delay       = 0;
 799         s->low_delay       = 1;
 800         break;
 801     case AV_CODEC_ID_MSMPEG4V3:
 802         s->out_format        = FMT_H263;
 803         s->h263_pred         = 1;
 804         s->unrestricted_mv   = 1;
 805         s->msmpeg4_version   = 3;
 806         s->flipflop_rounding = 1;
 807         avctx->delay         = 0;
 808         s->low_delay         = 1;
 809         break;
 810     case AV_CODEC_ID_WMV1:
 811         s->out_format        = FMT_H263;
 812         s->h263_pred         = 1;
 813         s->unrestricted_mv   = 1;
 814         s->msmpeg4_version   = 4;
 815         s->flipflop_rounding = 1;
 816         avctx->delay         = 0;
 817         s->low_delay         = 1;
 818         break;
 819     case AV_CODEC_ID_WMV2:
 820         s->out_format        = FMT_H263;
 821         s->h263_pred         = 1;
 822         s->unrestricted_mv   = 1;
 823         s->msmpeg4_version   = 5;
 824         s->flipflop_rounding = 1;
 825         avctx->delay         = 0;
 826         s->low_delay         = 1;
 827         break;
 828     default:
 829         return -1;
 830     }
 831
 832     avctx->has_b_frames = !s->low_delay;
 833
 834     s->encoding = 1;
 835
 836     s->progressive_frame    =
 837     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 838                                                 CODEC_FLAG_INTERLACED_ME) ||
 839                                 s->alternate_scan);
 840
 841     /* init */
 842     ff_mpv_idct_init(s);
 843     if (ff_mpv_common_init(s) < 0)
 844         return -1;
 845
 846     ff_fdctdsp_init(&s->fdsp, avctx);
 847     ff_me_cmp_init(&s->mecc, avctx);
 848     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 849     ff_pixblockdsp_init(&s->pdsp, avctx);
 850     ff_qpeldsp_init(&s->qdsp);
 851
 852     s->avctx->coded_frame = s->current_picture.f;
 853
 854     if (s->msmpeg4_version) {
 855         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 856                           2 * 2 * (MAX_LEVEL + 1) *
 857                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 858     }
 859     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 860
 861     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 862     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 863     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 864     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 865     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 866     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 867     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 868                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 869     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 870                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 871
 872     if (s->avctx->noise_reduction) {
 873         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 874                           2 * 64 * sizeof(uint16_t), fail);
 875     }
 876
 877     ff_dct_encode_init(s);
 878
 879     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 880         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 881
 882     s->quant_precision = 5;
 883
 884     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 885     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
 886
 887     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 888         ff_h261_encode_init(s);
 889     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 890         ff_h263_encode_init(s);
 891     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 892         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
 893             return ret;
 894     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 895         && s->out_format == FMT_MPEG1)
 896         ff_mpeg1_encode_init(s);
 897
 898     /* init q matrix */
 899     for (i = 0; i < 64; i++) {
 900         int j = s->idsp.idct_permutation[i];
 901         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 902             s->mpeg_quant) {
 903             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 904             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 905         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 906             s->intra_matrix[j] =
 907             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 908         } else {
 909             /* mpeg1/2 */
 910             s->chroma_intra_matrix[j] =
 911             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 912             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 913         }
 914         if (s->avctx->intra_matrix)
 915             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 916         if (s->avctx->inter_matrix)
 917             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 918     }
 919
 920     /* precompute matrix */
 921     /* for mjpeg, we do include qscale in the matrix */
 922     if (s->out_format != FMT_MJPEG) {
 923         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 924                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 925                           31, 1);
 926         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 927                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 928                           31, 0);
 929     }
 930
 931     if (ff_rate_control_init(s) < 0)
 932         return -1;
 933
 934 #if FF_API_ERROR_RATE
 935     FF_DISABLE_DEPRECATION_WARNINGS
 936     if (avctx->error_rate)
 937         s->error_rate = avctx->error_rate;
 938     FF_ENABLE_DEPRECATION_WARNINGS;
 939 #endif
 940
 941 #if FF_API_NORMALIZE_AQP
 942     FF_DISABLE_DEPRECATION_WARNINGS
 943     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 944         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 945     FF_ENABLE_DEPRECATION_WARNINGS;
 946 #endif
 947
 948 #if FF_API_MV0
 949     FF_DISABLE_DEPRECATION_WARNINGS
 950     if (avctx->flags & CODEC_FLAG_MV0)
 951         s->mpv_flags |= FF_MPV_FLAG_MV0;
 952     FF_ENABLE_DEPRECATION_WARNINGS
 953 #endif
 954
 955 #if FF_API_MPV_OPT
 956     FF_DISABLE_DEPRECATION_WARNINGS
 957     if (avctx->rc_qsquish != 0.0)
 958         s->rc_qsquish = avctx->rc_qsquish;
 959     if (avctx->rc_qmod_amp != 0.0)
 960         s->rc_qmod_amp = avctx->rc_qmod_amp;
 961     if (avctx->rc_qmod_freq)
 962         s->rc_qmod_freq = avctx->rc_qmod_freq;
 963     if (avctx->rc_buffer_aggressivity != 1.0)
 964         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
 965     if (avctx->rc_initial_cplx != 0.0)
 966         s->rc_initial_cplx = avctx->rc_initial_cplx;
 967     if (avctx->lmin)
 968         s->lmin = avctx->lmin;
 969     if (avctx->lmax)
 970         s->lmax = avctx->lmax;
 971
 972     if (avctx->rc_eq) {
 973         av_freep(&s->rc_eq);
 974         s->rc_eq = av_strdup(avctx->rc_eq);
 975         if (!s->rc_eq)
 976             return AVERROR(ENOMEM);
 977     }
 978     FF_ENABLE_DEPRECATION_WARNINGS
 979 #endif
 980
 981     if (avctx->b_frame_strategy == 2) {
 982         for (i = 0; i < s->max_b_frames + 2; i++) {
 983             s->tmp_frames[i] = av_frame_alloc();
 984             if (!s->tmp_frames[i])
 985                 return AVERROR(ENOMEM);
 986
 987             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 988             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 989             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 990
 991             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 992             if (ret < 0)
 993                 return ret;
 994         }
 995     }
 996
 997     return 0;
 998 fail:
 999     ff_mpv_encode_end(avctx);
1000     return AVERROR_UNKNOWN;
1001 }
1002
1003 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1004 {
1005     MpegEncContext *s = avctx->priv_data;
1006     int i;
1007
1008     ff_rate_control_uninit(s);
1009
1010     ff_mpv_common_end(s);
1011     if (CONFIG_MJPEG_ENCODER &&
1012         s->out_format == FMT_MJPEG)
1013         ff_mjpeg_encode_close(s);
1014
1015     av_freep(&avctx->extradata);
1016
1017     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1018         av_frame_free(&s->tmp_frames[i]);
1019
1020     ff_free_picture_tables(&s->new_picture);
1021     ff_mpeg_unref_picture(s, &s->new_picture);
1022
1023     av_freep(&s->avctx->stats_out);
1024     av_freep(&s->ac_stats);
1025
1026     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1027     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1028     s->q_chroma_intra_matrix=   NULL;
1029     s->q_chroma_intra_matrix16= NULL;
1030     av_freep(&s->q_intra_matrix);
1031     av_freep(&s->q_inter_matrix);
1032     av_freep(&s->q_intra_matrix16);
1033     av_freep(&s->q_inter_matrix16);
1034     av_freep(&s->input_picture);
1035     av_freep(&s->reordered_input_picture);
1036     av_freep(&s->dct_offset);
1037
1038     return 0;
1039 }
1040
1041 static int get_sae(uint8_t *src, int ref, int stride)
1042 {
1043     int x,y;
1044     int acc = 0;
1045
1046     for (y = 0; y < 16; y++) {
1047         for (x = 0; x < 16; x++) {
1048             acc += FFABS(src[x + y * stride] - ref);
1049         }
1050     }
1051
1052     return acc;
1053 }
1054
1055 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1056                            uint8_t *ref, int stride)
1057 {
1058     int x, y, w, h;
1059     int acc = 0;
1060
1061     w = s->width  & ~15;
1062     h = s->height & ~15;
1063
1064     for (y = 0; y < h; y += 16) {
1065         for (x = 0; x < w; x += 16) {
1066             int offset = x + y * stride;
1067             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1068                                       stride, 16);
1069             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1070             int sae  = get_sae(src + offset, mean, stride);
1071
1072             acc += sae + 500 < sad;
1073         }
1074     }
1075     return acc;
1076 }
1077
1078
1079 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1080 {
1081     Picture *pic = NULL;
1082     int64_t pts;
1083     int i, display_picture_number = 0, ret;
1084     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1085                                                  (s->low_delay ? 0 : 1);
1086     int direct = 1;
1087
1088     if (pic_arg) {
1089         pts = pic_arg->pts;
1090         display_picture_number = s->input_picture_number++;
1091
1092         if (pts != AV_NOPTS_VALUE) {
1093             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1094                 int64_t last = s->user_specified_pts;
1095
1096                 if (pts <= last) {
1097                     av_log(s->avctx, AV_LOG_ERROR,
1098                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1099                            pts, last);
1100                     return AVERROR(EINVAL);
1101                 }
1102
1103                 if (!s->low_delay && display_picture_number == 1)
1104                     s->dts_delta = pts - last;
1105             }
1106             s->user_specified_pts = pts;
1107         } else {
1108             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1109                 s->user_specified_pts =
1110                 pts = s->user_specified_pts + 1;
1111                 av_log(s->avctx, AV_LOG_INFO,
1112                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1113                        pts);
1114             } else {
1115                 pts = display_picture_number;
1116             }
1117         }
1118     }
1119
1120     if (pic_arg) {
1121         if (!pic_arg->buf[0] ||
1122             pic_arg->linesize[0] != s->linesize ||
1123             pic_arg->linesize[1] != s->uvlinesize ||
1124             pic_arg->linesize[2] != s->uvlinesize)
1125             direct = 0;
1126         if ((s->width & 15) || (s->height & 15))
1127             direct = 0;
1128         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1129             direct = 0;
1130         if (s->linesize & (STRIDE_ALIGN-1))
1131             direct = 0;
1132
1133         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1134                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1135
1136         i = ff_find_unused_picture(s, direct);
1137         if (i < 0)
1138             return i;
1139
1140         pic = &s->picture[i];
1141         pic->reference = 3;
1142
1143         if (direct) {
1144             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1145                 return ret;
1146             if (ff_alloc_picture(s, pic, 1) < 0) {
1147                 return -1;
1148             }
1149         } else {
1150             if (ff_alloc_picture(s, pic, 0) < 0) {
1151                 return -1;
1152             }
1153
1154             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1155                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1156                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1157                 // empty
1158             } else {
1159                 int h_chroma_shift, v_chroma_shift;
1160                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1161                                                  &h_chroma_shift,
1162                                                  &v_chroma_shift);
1163
1164                 for (i = 0; i < 3; i++) {
1165                     int src_stride = pic_arg->linesize[i];
1166                     int dst_stride = i ? s->uvlinesize : s->linesize;
1167                     int h_shift = i ? h_chroma_shift : 0;
1168                     int v_shift = i ? v_chroma_shift : 0;
1169                     int w = s->width  >> h_shift;
1170                     int h = s->height >> v_shift;
1171                     uint8_t *src = pic_arg->data[i];
1172                     uint8_t *dst = pic->f->data[i];
1173                     int vpad = 16;
1174
1175                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1176                         && !s->progressive_sequence
1177                         && FFALIGN(s->height, 32) - s->height > 16)
1178                         vpad = 32;
1179
1180                     if (!s->avctx->rc_buffer_size)
1181                         dst += INPLACE_OFFSET;
1182
1183                     if (src_stride == dst_stride)
1184                         memcpy(dst, src, src_stride * h);
1185                     else {
1186                         int h2 = h;
1187                         uint8_t *dst2 = dst;
1188                         while (h2--) {
1189                             memcpy(dst2, src, w);
1190                             dst2 += dst_stride;
1191                             src += src_stride;
1192                         }
1193                     }
1194                     if ((s->width & 15) || (s->height & (vpad-1))) {
1195                         s->mpvencdsp.draw_edges(dst, dst_stride,
1196                                                 w, h,
1197                                                 16 >> h_shift,
1198                                                 vpad >> v_shift,
1199                                                 EDGE_BOTTOM);
1200                     }
1201                 }
1202             }
1203         }
1204         ret = av_frame_copy_props(pic->f, pic_arg);
1205         if (ret < 0)
1206             return ret;
1207
1208         pic->f->display_picture_number = display_picture_number;
1209         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1210     }
1211
1212     /* shift buffer entries */
1213     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1214         s->input_picture[i - 1] = s->input_picture[i];
1215
1216     s->input_picture[encoding_delay] = (Picture*) pic;
1217
1218     return 0;
1219 }
1220
1221 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1222 {
1223     int x, y, plane;
1224     int score = 0;
1225     int64_t score64 = 0;
1226
1227     for (plane = 0; plane < 3; plane++) {
1228         const int stride = p->f->linesize[plane];
1229         const int bw = plane ? 1 : 2;
1230         for (y = 0; y < s->mb_height * bw; y++) {
1231             for (x = 0; x < s->mb_width * bw; x++) {
1232                 int off = p->shared ? 0 : 16;
1233                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1234                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1235                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1236
1237                 switch (FFABS(s->avctx->frame_skip_exp)) {
1238                 case 0: score    =  FFMAX(score, v);          break;
1239                 case 1: score   += FFABS(v);                  break;
1240                 case 2: score64 += v * (int64_t)v;                       break;
1241                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1242                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1243                 }
1244             }
1245         }
1246     }
1247     emms_c();
1248
1249     if (score)
1250         score64 = score;
1251     if (s->avctx->frame_skip_exp < 0)
1252         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1253                       -1.0/s->avctx->frame_skip_exp);
1254
1255     if (score64 < s->avctx->frame_skip_threshold)
1256         return 1;
1257     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1258         return 1;
1259     return 0;
1260 }
1261
1262 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1263 {
1264     AVPacket pkt = { 0 };
1265     int ret, got_output;
1266
1267     av_init_packet(&pkt);
1268     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1269     if (ret < 0)
1270         return ret;
1271
1272     ret = pkt.size;
1273     av_free_packet(&pkt);
1274     return ret;
1275 }
1276
1277 static int estimate_best_b_count(MpegEncContext *s)
1278 {
1279     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1280     AVCodecContext *c = avcodec_alloc_context3(NULL);
1281     const int scale = s->avctx->brd_scale;
1282     int i, j, out_size, p_lambda, b_lambda, lambda2;
1283     int64_t best_rd  = INT64_MAX;
1284     int best_b_count = -1;
1285
1286     av_assert0(scale >= 0 && scale <= 3);
1287
1288     //emms_c();
1289     //s->next_picture_ptr->quality;
1290     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1291     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1292     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1293     if (!b_lambda) // FIXME we should do this somewhere else
1294         b_lambda = p_lambda;
1295     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1296                FF_LAMBDA_SHIFT;
1297
1298     c->width        = s->width  >> scale;
1299     c->height       = s->height >> scale;
1300     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1301     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1302     c->mb_decision  = s->avctx->mb_decision;
1303     c->me_cmp       = s->avctx->me_cmp;
1304     c->mb_cmp       = s->avctx->mb_cmp;
1305     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1306     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1307     c->time_base    = s->avctx->time_base;
1308     c->max_b_frames = s->max_b_frames;
1309
1310     if (avcodec_open2(c, codec, NULL) < 0)
1311         return -1;
1312
1313     for (i = 0; i < s->max_b_frames + 2; i++) {
1314         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1315                                                 s->next_picture_ptr;
1316         uint8_t *data[4];
1317
1318         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1319             pre_input = *pre_input_ptr;
1320             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1321
1322             if (!pre_input.shared && i) {
1323                 data[0] += INPLACE_OFFSET;
1324                 data[1] += INPLACE_OFFSET;
1325                 data[2] += INPLACE_OFFSET;
1326             }
1327
1328             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1329                                        s->tmp_frames[i]->linesize[0],
1330                                        data[0],
1331                                        pre_input.f->linesize[0],
1332                                        c->width, c->height);
1333             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1334                                        s->tmp_frames[i]->linesize[1],
1335                                        data[1],
1336                                        pre_input.f->linesize[1],
1337                                        c->width >> 1, c->height >> 1);
1338             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1339                                        s->tmp_frames[i]->linesize[2],
1340                                        data[2],
1341                                        pre_input.f->linesize[2],
1342                                        c->width >> 1, c->height >> 1);
1343         }
1344     }
1345
1346     for (j = 0; j < s->max_b_frames + 1; j++) {
1347         int64_t rd = 0;
1348
1349         if (!s->input_picture[j])
1350             break;
1351
1352         c->error[0] = c->error[1] = c->error[2] = 0;
1353
1354         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1355         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1356
1357         out_size = encode_frame(c, s->tmp_frames[0]);
1358
1359         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1360
1361         for (i = 0; i < s->max_b_frames + 1; i++) {
1362             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1363
1364             s->tmp_frames[i + 1]->pict_type = is_p ?
1365                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1366             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1367
1368             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1369
1370             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1371         }
1372
1373         /* get the delayed frames */
1374         while (out_size) {
1375             out_size = encode_frame(c, NULL);
1376             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1377         }
1378
1379         rd += c->error[0] + c->error[1] + c->error[2];
1380
1381         if (rd < best_rd) {
1382             best_rd = rd;
1383             best_b_count = j;
1384         }
1385     }
1386
1387     avcodec_close(c);
1388     av_freep(&c);
1389
1390     return best_b_count;
1391 }
1392
1393 static int select_input_picture(MpegEncContext *s)
1394 {
1395     int i, ret;
1396
1397     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1398         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1399     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1400
1401     /* set next picture type & ordering */
1402     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1403         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1404             if (s->picture_in_gop_number < s->gop_size &&
1405                 s->next_picture_ptr &&
1406                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1407                 // FIXME check that te gop check above is +-1 correct
1408                 av_frame_unref(s->input_picture[0]->f);
1409
1410                 ff_vbv_update(s, 0);
1411
1412                 goto no_output_pic;
1413             }
1414         }
1415
1416         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1417             !s->next_picture_ptr || s->intra_only) {
1418             s->reordered_input_picture[0] = s->input_picture[0];
1419             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1420             s->reordered_input_picture[0]->f->coded_picture_number =
1421                 s->coded_picture_number++;
1422         } else {
1423             int b_frames;
1424
1425             if (s->flags & CODEC_FLAG_PASS2) {
1426                 for (i = 0; i < s->max_b_frames + 1; i++) {
1427                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1428
1429                     if (pict_num >= s->rc_context.num_entries)
1430                         break;
1431                     if (!s->input_picture[i]) {
1432                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1433                         break;
1434                     }
1435
1436                     s->input_picture[i]->f->pict_type =
1437                         s->rc_context.entry[pict_num].new_pict_type;
1438                 }
1439             }
1440
1441             if (s->avctx->b_frame_strategy == 0) {
1442                 b_frames = s->max_b_frames;
1443                 while (b_frames && !s->input_picture[b_frames])
1444                     b_frames--;
1445             } else if (s->avctx->b_frame_strategy == 1) {
1446                 for (i = 1; i < s->max_b_frames + 1; i++) {
1447                     if (s->input_picture[i] &&
1448                         s->input_picture[i]->b_frame_score == 0) {
1449                         s->input_picture[i]->b_frame_score =
1450                             get_intra_count(s,
1451                                             s->input_picture[i    ]->f->data[0],
1452                                             s->input_picture[i - 1]->f->data[0],
1453                                             s->linesize) + 1;
1454                     }
1455                 }
1456                 for (i = 0; i < s->max_b_frames + 1; i++) {
1457                     if (!s->input_picture[i] ||
1458                         s->input_picture[i]->b_frame_score - 1 >
1459                             s->mb_num / s->avctx->b_sensitivity)
1460                         break;
1461                 }
1462
1463                 b_frames = FFMAX(0, i - 1);
1464
1465                 /* reset scores */
1466                 for (i = 0; i < b_frames + 1; i++) {
1467                     s->input_picture[i]->b_frame_score = 0;
1468                 }
1469             } else if (s->avctx->b_frame_strategy == 2) {
1470                 b_frames = estimate_best_b_count(s);
1471             } else {
1472                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1473                 b_frames = 0;
1474             }
1475
1476             emms_c();
1477
1478             for (i = b_frames - 1; i >= 0; i--) {
1479                 int type = s->input_picture[i]->f->pict_type;
1480                 if (type && type != AV_PICTURE_TYPE_B)
1481                     b_frames = i;
1482             }
1483             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1484                 b_frames == s->max_b_frames) {
1485                 av_log(s->avctx, AV_LOG_ERROR,
1486                        "warning, too many b frames in a row\n");
1487             }
1488
1489             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1490                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1491                     s->gop_size > s->picture_in_gop_number) {
1492                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1493                 } else {
1494                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1495                         b_frames = 0;
1496                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1497                 }
1498             }
1499
1500             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1501                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1502                 b_frames--;
1503
1504             s->reordered_input_picture[0] = s->input_picture[b_frames];
1505             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1506                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1507             s->reordered_input_picture[0]->f->coded_picture_number =
1508                 s->coded_picture_number++;
1509             for (i = 0; i < b_frames; i++) {
1510                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1511                 s->reordered_input_picture[i + 1]->f->pict_type =
1512                     AV_PICTURE_TYPE_B;
1513                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1514                     s->coded_picture_number++;
1515             }
1516         }
1517     }
1518 no_output_pic:
1519     if (s->reordered_input_picture[0]) {
1520         s->reordered_input_picture[0]->reference =
1521            s->reordered_input_picture[0]->f->pict_type !=
1522                AV_PICTURE_TYPE_B ? 3 : 0;
1523
1524         ff_mpeg_unref_picture(s, &s->new_picture);
1525         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1526             return ret;
1527
1528         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1529             // input is a shared pix, so we can't modifiy it -> alloc a new
1530             // one & ensure that the shared one is reuseable
1531
1532             Picture *pic;
1533             int i = ff_find_unused_picture(s, 0);
1534             if (i < 0)
1535                 return i;
1536             pic = &s->picture[i];
1537
1538             pic->reference = s->reordered_input_picture[0]->reference;
1539             if (ff_alloc_picture(s, pic, 0) < 0) {
1540                 return -1;
1541             }
1542
1543             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1544             if (ret < 0)
1545                 return ret;
1546
1547             /* mark us unused / free shared pic */
1548             av_frame_unref(s->reordered_input_picture[0]->f);
1549             s->reordered_input_picture[0]->shared = 0;
1550
1551             s->current_picture_ptr = pic;
1552         } else {
1553             // input is not a shared pix -> reuse buffer for current_pix
1554             s->current_picture_ptr = s->reordered_input_picture[0];
1555             for (i = 0; i < 4; i++) {
1556                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1557             }
1558         }
1559         ff_mpeg_unref_picture(s, &s->current_picture);
1560         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1561                                        s->current_picture_ptr)) < 0)
1562             return ret;
1563
1564         s->picture_number = s->new_picture.f->display_picture_number;
1565     } else {
1566         ff_mpeg_unref_picture(s, &s->new_picture);
1567     }
1568     return 0;
1569 }
1570
1571 static void frame_end(MpegEncContext *s)
1572 {
1573     if (s->unrestricted_mv &&
1574         s->current_picture.reference &&
1575         !s->intra_only) {
1576         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1577         int hshift = desc->log2_chroma_w;
1578         int vshift = desc->log2_chroma_h;
1579         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1580                                 s->current_picture.f->linesize[0],
1581                                 s->h_edge_pos, s->v_edge_pos,
1582                                 EDGE_WIDTH, EDGE_WIDTH,
1583                                 EDGE_TOP | EDGE_BOTTOM);
1584         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1585                                 s->current_picture.f->linesize[1],
1586                                 s->h_edge_pos >> hshift,
1587                                 s->v_edge_pos >> vshift,
1588                                 EDGE_WIDTH >> hshift,
1589                                 EDGE_WIDTH >> vshift,
1590                                 EDGE_TOP | EDGE_BOTTOM);
1591         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1592                                 s->current_picture.f->linesize[2],
1593                                 s->h_edge_pos >> hshift,
1594                                 s->v_edge_pos >> vshift,
1595                                 EDGE_WIDTH >> hshift,
1596                                 EDGE_WIDTH >> vshift,
1597                                 EDGE_TOP | EDGE_BOTTOM);
1598     }
1599
1600     emms_c();
1601
1602     s->last_pict_type                 = s->pict_type;
1603     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1604     if (s->pict_type!= AV_PICTURE_TYPE_B)
1605         s->last_non_b_pict_type = s->pict_type;
1606
1607     s->avctx->coded_frame = s->current_picture_ptr->f;
1608
1609 }
1610
1611 static void update_noise_reduction(MpegEncContext *s)
1612 {
1613     int intra, i;
1614
1615     for (intra = 0; intra < 2; intra++) {
1616         if (s->dct_count[intra] > (1 << 16)) {
1617             for (i = 0; i < 64; i++) {
1618                 s->dct_error_sum[intra][i] >>= 1;
1619             }
1620             s->dct_count[intra] >>= 1;
1621         }
1622
1623         for (i = 0; i < 64; i++) {
1624             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1625                                        s->dct_count[intra] +
1626                                        s->dct_error_sum[intra][i] / 2) /
1627                                       (s->dct_error_sum[intra][i] + 1);
1628         }
1629     }
1630 }
1631
1632 static int frame_start(MpegEncContext *s)
1633 {
1634     int ret;
1635
1636     /* mark & release old frames */
1637     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1638         s->last_picture_ptr != s->next_picture_ptr &&
1639         s->last_picture_ptr->f->buf[0]) {
1640         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1641     }
1642
1643     s->current_picture_ptr->f->pict_type = s->pict_type;
1644     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1645
1646     ff_mpeg_unref_picture(s, &s->current_picture);
1647     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1648                                    s->current_picture_ptr)) < 0)
1649         return ret;
1650
1651     if (s->pict_type != AV_PICTURE_TYPE_B) {
1652         s->last_picture_ptr = s->next_picture_ptr;
1653         if (!s->droppable)
1654             s->next_picture_ptr = s->current_picture_ptr;
1655     }
1656
1657     if (s->last_picture_ptr) {
1658         ff_mpeg_unref_picture(s, &s->last_picture);
1659         if (s->last_picture_ptr->f->buf[0] &&
1660             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1661                                        s->last_picture_ptr)) < 0)
1662             return ret;
1663     }
1664     if (s->next_picture_ptr) {
1665         ff_mpeg_unref_picture(s, &s->next_picture);
1666         if (s->next_picture_ptr->f->buf[0] &&
1667             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1668                                        s->next_picture_ptr)) < 0)
1669             return ret;
1670     }
1671
1672     if (s->picture_structure!= PICT_FRAME) {
1673         int i;
1674         for (i = 0; i < 4; i++) {
1675             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1676                 s->current_picture.f->data[i] +=
1677                     s->current_picture.f->linesize[i];
1678             }
1679             s->current_picture.f->linesize[i] *= 2;
1680             s->last_picture.f->linesize[i]    *= 2;
1681             s->next_picture.f->linesize[i]    *= 2;
1682         }
1683     }
1684
1685     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1686         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1687         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1688     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1689         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1690         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1691     } else {
1692         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1693         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1694     }
1695
1696     if (s->dct_error_sum) {
1697         av_assert2(s->avctx->noise_reduction && s->encoding);
1698         update_noise_reduction(s);
1699     }
1700
1701     return 0;
1702 }
1703
1704 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1705                           const AVFrame *pic_arg, int *got_packet)
1706 {
1707     MpegEncContext *s = avctx->priv_data;
1708     int i, stuffing_count, ret;
1709     int context_count = s->slice_context_count;
1710
1711     s->picture_in_gop_number++;
1712
1713     if (load_input_picture(s, pic_arg) < 0)
1714         return -1;
1715
1716     if (select_input_picture(s) < 0) {
1717         return -1;
1718     }
1719
1720     /* output? */
1721     if (s->new_picture.f->data[0]) {
1722         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1723         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - FF_INPUT_BUFFER_PADDING_SIZE
1724                                               :
1725                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1726         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
1727             return ret;
1728         if (s->mb_info) {
1729             s->mb_info_ptr = av_packet_new_side_data(pkt,
1730                                  AV_PKT_DATA_H263_MB_INFO,
1731                                  s->mb_width*s->mb_height*12);
1732             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1733         }
1734
1735         for (i = 0; i < context_count; i++) {
1736             int start_y = s->thread_context[i]->start_mb_y;
1737             int   end_y = s->thread_context[i]->  end_mb_y;
1738             int h       = s->mb_height;
1739             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1740             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1741
1742             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1743         }
1744
1745         s->pict_type = s->new_picture.f->pict_type;
1746         //emms_c();
1747         ret = frame_start(s);
1748         if (ret < 0)
1749             return ret;
1750 vbv_retry:
1751         ret = encode_picture(s, s->picture_number);
1752         if (growing_buffer) {
1753             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1754             pkt->data = s->pb.buf;
1755             pkt->size = avctx->internal->byte_buffer_size;
1756         }
1757         if (ret < 0)
1758             return -1;
1759
1760         avctx->header_bits = s->header_bits;
1761         avctx->mv_bits     = s->mv_bits;
1762         avctx->misc_bits   = s->misc_bits;
1763         avctx->i_tex_bits  = s->i_tex_bits;
1764         avctx->p_tex_bits  = s->p_tex_bits;
1765         avctx->i_count     = s->i_count;
1766         // FIXME f/b_count in avctx
1767         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1768         avctx->skip_count  = s->skip_count;
1769
1770         frame_end(s);
1771
1772         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1773             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1774
1775         if (avctx->rc_buffer_size) {
1776             RateControlContext *rcc = &s->rc_context;
1777             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1778
1779             if (put_bits_count(&s->pb) > max_size &&
1780                 s->lambda < s->lmax) {
1781                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1782                                        (s->qscale + 1) / s->qscale);
1783                 if (s->adaptive_quant) {
1784                     int i;
1785                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1786                         s->lambda_table[i] =
1787                             FFMAX(s->lambda_table[i] + 1,
1788                                   s->lambda_table[i] * (s->qscale + 1) /
1789                                   s->qscale);
1790                 }
1791                 s->mb_skipped = 0;        // done in frame_start()
1792                 // done in encode_picture() so we must undo it
1793                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1794                     if (s->flipflop_rounding          ||
1795                         s->codec_id == AV_CODEC_ID_H263P ||
1796                         s->codec_id == AV_CODEC_ID_MPEG4)
1797                         s->no_rounding ^= 1;
1798                 }
1799                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1800                     s->time_base       = s->last_time_base;
1801                     s->last_non_b_time = s->time - s->pp_time;
1802                 }
1803                 for (i = 0; i < context_count; i++) {
1804                     PutBitContext *pb = &s->thread_context[i]->pb;
1805                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1806                 }
1807                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1808                 goto vbv_retry;
1809             }
1810
1811             av_assert0(s->avctx->rc_max_rate);
1812         }
1813
1814         if (s->flags & CODEC_FLAG_PASS1)
1815             ff_write_pass1_stats(s);
1816
1817         for (i = 0; i < 4; i++) {
1818             s->current_picture_ptr->f->error[i] =
1819             s->current_picture.f->error[i] =
1820                 s->current_picture.error[i];
1821             avctx->error[i] += s->current_picture_ptr->f->error[i];
1822         }
1823
1824         if (s->flags & CODEC_FLAG_PASS1)
1825             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1826                    avctx->i_tex_bits + avctx->p_tex_bits ==
1827                        put_bits_count(&s->pb));
1828         flush_put_bits(&s->pb);
1829         s->frame_bits  = put_bits_count(&s->pb);
1830
1831         stuffing_count = ff_vbv_update(s, s->frame_bits);
1832         s->stuffing_bits = 8*stuffing_count;
1833         if (stuffing_count) {
1834             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1835                     stuffing_count + 50) {
1836                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1837                 return -1;
1838             }
1839
1840             switch (s->codec_id) {
1841             case AV_CODEC_ID_MPEG1VIDEO:
1842             case AV_CODEC_ID_MPEG2VIDEO:
1843                 while (stuffing_count--) {
1844                     put_bits(&s->pb, 8, 0);
1845                 }
1846             break;
1847             case AV_CODEC_ID_MPEG4:
1848                 put_bits(&s->pb, 16, 0);
1849                 put_bits(&s->pb, 16, 0x1C3);
1850                 stuffing_count -= 4;
1851                 while (stuffing_count--) {
1852                     put_bits(&s->pb, 8, 0xFF);
1853                 }
1854             break;
1855             default:
1856                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1857             }
1858             flush_put_bits(&s->pb);
1859             s->frame_bits  = put_bits_count(&s->pb);
1860         }
1861
1862         /* update mpeg1/2 vbv_delay for CBR */
1863         if (s->avctx->rc_max_rate                          &&
1864             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1865             s->out_format == FMT_MPEG1                     &&
1866             90000LL * (avctx->rc_buffer_size - 1) <=
1867                 s->avctx->rc_max_rate * 0xFFFFLL) {
1868             int vbv_delay, min_delay;
1869             double inbits  = s->avctx->rc_max_rate *
1870                              av_q2d(s->avctx->time_base);
1871             int    minbits = s->frame_bits - 8 *
1872                              (s->vbv_delay_ptr - s->pb.buf - 1);
1873             double bits    = s->rc_context.buffer_index + minbits - inbits;
1874
1875             if (bits < 0)
1876                 av_log(s->avctx, AV_LOG_ERROR,
1877                        "Internal error, negative bits\n");
1878
1879             assert(s->repeat_first_field == 0);
1880
1881             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1882             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1883                         s->avctx->rc_max_rate;
1884
1885             vbv_delay = FFMAX(vbv_delay, min_delay);
1886
1887             av_assert0(vbv_delay < 0xFFFF);
1888
1889             s->vbv_delay_ptr[0] &= 0xF8;
1890             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1891             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1892             s->vbv_delay_ptr[2] &= 0x07;
1893             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1894             avctx->vbv_delay     = vbv_delay * 300;
1895         }
1896         s->total_bits     += s->frame_bits;
1897         avctx->frame_bits  = s->frame_bits;
1898
1899         pkt->pts = s->current_picture.f->pts;
1900         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1901             if (!s->current_picture.f->coded_picture_number)
1902                 pkt->dts = pkt->pts - s->dts_delta;
1903             else
1904                 pkt->dts = s->reordered_pts;
1905             s->reordered_pts = pkt->pts;
1906         } else
1907             pkt->dts = pkt->pts;
1908         if (s->current_picture.f->key_frame)
1909             pkt->flags |= AV_PKT_FLAG_KEY;
1910         if (s->mb_info)
1911             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1912     } else {
1913         s->frame_bits = 0;
1914     }
1915
1916     /* release non-reference frames */
1917     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1918         if (!s->picture[i].reference)
1919             ff_mpeg_unref_picture(s, &s->picture[i]);
1920     }
1921
1922     av_assert1((s->frame_bits & 7) == 0);
1923
1924     pkt->size = s->frame_bits / 8;
1925     *got_packet = !!pkt->size;
1926     return 0;
1927 }
1928
1929 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1930                                                 int n, int threshold)
1931 {
1932     static const char tab[64] = {
1933         3, 2, 2, 1, 1, 1, 1, 1,
1934         1, 1, 1, 1, 1, 1, 1, 1,
1935         1, 1, 1, 1, 1, 1, 1, 1,
1936         0, 0, 0, 0, 0, 0, 0, 0,
1937         0, 0, 0, 0, 0, 0, 0, 0,
1938         0, 0, 0, 0, 0, 0, 0, 0,
1939         0, 0, 0, 0, 0, 0, 0, 0,
1940         0, 0, 0, 0, 0, 0, 0, 0
1941     };
1942     int score = 0;
1943     int run = 0;
1944     int i;
1945     int16_t *block = s->block[n];
1946     const int last_index = s->block_last_index[n];
1947     int skip_dc;
1948
1949     if (threshold < 0) {
1950         skip_dc = 0;
1951         threshold = -threshold;
1952     } else
1953         skip_dc = 1;
1954
1955     /* Are all we could set to zero already zero? */
1956     if (last_index <= skip_dc - 1)
1957         return;
1958
1959     for (i = 0; i <= last_index; i++) {
1960         const int j = s->intra_scantable.permutated[i];
1961         const int level = FFABS(block[j]);
1962         if (level == 1) {
1963             if (skip_dc && i == 0)
1964                 continue;
1965             score += tab[run];
1966             run = 0;
1967         } else if (level > 1) {
1968             return;
1969         } else {
1970             run++;
1971         }
1972     }
1973     if (score >= threshold)
1974         return;
1975     for (i = skip_dc; i <= last_index; i++) {
1976         const int j = s->intra_scantable.permutated[i];
1977         block[j] = 0;
1978     }
1979     if (block[0])
1980         s->block_last_index[n] = 0;
1981     else
1982         s->block_last_index[n] = -1;
1983 }
1984
1985 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1986                                int last_index)
1987 {
1988     int i;
1989     const int maxlevel = s->max_qcoeff;
1990     const int minlevel = s->min_qcoeff;
1991     int overflow = 0;
1992
1993     if (s->mb_intra) {
1994         i = 1; // skip clipping of intra dc
1995     } else
1996         i = 0;
1997
1998     for (; i <= last_index; i++) {
1999         const int j = s->intra_scantable.permutated[i];
2000         int level = block[j];
2001
2002         if (level > maxlevel) {
2003             level = maxlevel;
2004             overflow++;
2005         } else if (level < minlevel) {
2006             level = minlevel;
2007             overflow++;
2008         }
2009
2010         block[j] = level;
2011     }
2012
2013     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2014         av_log(s->avctx, AV_LOG_INFO,
2015                "warning, clipping %d dct coefficients to %d..%d\n",
2016                overflow, minlevel, maxlevel);
2017 }
2018
2019 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2020 {
2021     int x, y;
2022     // FIXME optimize
2023     for (y = 0; y < 8; y++) {
2024         for (x = 0; x < 8; x++) {
2025             int x2, y2;
2026             int sum = 0;
2027             int sqr = 0;
2028             int count = 0;
2029
2030             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2031                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2032                     int v = ptr[x2 + y2 * stride];
2033                     sum += v;
2034                     sqr += v * v;
2035                     count++;
2036                 }
2037             }
2038             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2039         }
2040     }
2041 }
2042
2043 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2044                                                 int motion_x, int motion_y,
2045                                                 int mb_block_height,
2046                                                 int mb_block_width,
2047                                                 int mb_block_count)
2048 {
2049     int16_t weight[12][64];
2050     int16_t orig[12][64];
2051     const int mb_x = s->mb_x;
2052     const int mb_y = s->mb_y;
2053     int i;
2054     int skip_dct[12];
2055     int dct_offset = s->linesize * 8; // default for progressive frames
2056     int uv_dct_offset = s->uvlinesize * 8;
2057     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2058     ptrdiff_t wrap_y, wrap_c;
2059
2060     for (i = 0; i < mb_block_count; i++)
2061         skip_dct[i] = s->skipdct;
2062
2063     if (s->adaptive_quant) {
2064         const int last_qp = s->qscale;
2065         const int mb_xy = mb_x + mb_y * s->mb_stride;
2066
2067         s->lambda = s->lambda_table[mb_xy];
2068         update_qscale(s);
2069
2070         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2071             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2072             s->dquant = s->qscale - last_qp;
2073
2074             if (s->out_format == FMT_H263) {
2075                 s->dquant = av_clip(s->dquant, -2, 2);
2076
2077                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2078                     if (!s->mb_intra) {
2079                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2080                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2081                                 s->dquant = 0;
2082                         }
2083                         if (s->mv_type == MV_TYPE_8X8)
2084                             s->dquant = 0;
2085                     }
2086                 }
2087             }
2088         }
2089         ff_set_qscale(s, last_qp + s->dquant);
2090     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2091         ff_set_qscale(s, s->qscale + s->dquant);
2092
2093     wrap_y = s->linesize;
2094     wrap_c = s->uvlinesize;
2095     ptr_y  = s->new_picture.f->data[0] +
2096              (mb_y * 16 * wrap_y)              + mb_x * 16;
2097     ptr_cb = s->new_picture.f->data[1] +
2098              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2099     ptr_cr = s->new_picture.f->data[2] +
2100              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2101
2102     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2103         uint8_t *ebuf = s->edge_emu_buffer + 36 * wrap_y;
2104         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2105         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2106         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2107                                  wrap_y, wrap_y,
2108                                  16, 16, mb_x * 16, mb_y * 16,
2109                                  s->width, s->height);
2110         ptr_y = ebuf;
2111         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2112                                  wrap_c, wrap_c,
2113                                  mb_block_width, mb_block_height,
2114                                  mb_x * mb_block_width, mb_y * mb_block_height,
2115                                  cw, ch);
2116         ptr_cb = ebuf + 16 * wrap_y;
2117         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2118                                  wrap_c, wrap_c,
2119                                  mb_block_width, mb_block_height,
2120                                  mb_x * mb_block_width, mb_y * mb_block_height,
2121                                  cw, ch);
2122         ptr_cr = ebuf + 16 * wrap_y + 16;
2123     }
2124
2125     if (s->mb_intra) {
2126         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2127             int progressive_score, interlaced_score;
2128
2129             s->interlaced_dct = 0;
2130             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2131                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2132                                                      NULL, wrap_y, 8) - 400;
2133
2134             if (progressive_score > 0) {
2135                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2136                                                         NULL, wrap_y * 2, 8) +
2137                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2138                                                         NULL, wrap_y * 2, 8);
2139                 if (progressive_score > interlaced_score) {
2140                     s->interlaced_dct = 1;
2141
2142                     dct_offset = wrap_y;
2143                     uv_dct_offset = wrap_c;
2144                     wrap_y <<= 1;
2145                     if (s->chroma_format == CHROMA_422 ||
2146                         s->chroma_format == CHROMA_444)
2147                         wrap_c <<= 1;
2148                 }
2149             }
2150         }
2151
2152         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2153         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2154         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2155         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2156
2157         if (s->flags & CODEC_FLAG_GRAY) {
2158             skip_dct[4] = 1;
2159             skip_dct[5] = 1;
2160         } else {
2161             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2162             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2163             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2164                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2165                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2166             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2167                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2168                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2169                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2170                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2171                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2172                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2173             }
2174         }
2175     } else {
2176         op_pixels_func (*op_pix)[4];
2177         qpel_mc_func (*op_qpix)[16];
2178         uint8_t *dest_y, *dest_cb, *dest_cr;
2179
2180         dest_y  = s->dest[0];
2181         dest_cb = s->dest[1];
2182         dest_cr = s->dest[2];
2183
2184         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2185             op_pix  = s->hdsp.put_pixels_tab;
2186             op_qpix = s->qdsp.put_qpel_pixels_tab;
2187         } else {
2188             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2189             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2190         }
2191
2192         if (s->mv_dir & MV_DIR_FORWARD) {
2193             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2194                           s->last_picture.f->data,
2195                           op_pix, op_qpix);
2196             op_pix  = s->hdsp.avg_pixels_tab;
2197             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2198         }
2199         if (s->mv_dir & MV_DIR_BACKWARD) {
2200             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2201                           s->next_picture.f->data,
2202                           op_pix, op_qpix);
2203         }
2204
2205         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2206             int progressive_score, interlaced_score;
2207
2208             s->interlaced_dct = 0;
2209             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2210                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2211                                                      ptr_y + wrap_y * 8,
2212                                                      wrap_y, 8) - 400;
2213
2214             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2215                 progressive_score -= 400;
2216
2217             if (progressive_score > 0) {
2218                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2219                                                         wrap_y * 2, 8) +
2220                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2221                                                         ptr_y + wrap_y,
2222                                                         wrap_y * 2, 8);
2223
2224                 if (progressive_score > interlaced_score) {
2225                     s->interlaced_dct = 1;
2226
2227                     dct_offset = wrap_y;
2228                     uv_dct_offset = wrap_c;
2229                     wrap_y <<= 1;
2230                     if (s->chroma_format == CHROMA_422)
2231                         wrap_c <<= 1;
2232                 }
2233             }
2234         }
2235
2236         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2237         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2238         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2239                             dest_y + dct_offset, wrap_y);
2240         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2241                             dest_y + dct_offset + 8, wrap_y);
2242
2243         if (s->flags & CODEC_FLAG_GRAY) {
2244             skip_dct[4] = 1;
2245             skip_dct[5] = 1;
2246         } else {
2247             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2248             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2249             if (!s->chroma_y_shift) { /* 422 */
2250                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2251                                     dest_cb + uv_dct_offset, wrap_c);
2252                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2253                                     dest_cr + uv_dct_offset, wrap_c);
2254             }
2255         }
2256         /* pre quantization */
2257         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2258                 2 * s->qscale * s->qscale) {
2259             // FIXME optimize
2260             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2261                 skip_dct[0] = 1;
2262             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2263                 skip_dct[1] = 1;
2264             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2265                                wrap_y, 8) < 20 * s->qscale)
2266                 skip_dct[2] = 1;
2267             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2268                                wrap_y, 8) < 20 * s->qscale)
2269                 skip_dct[3] = 1;
2270             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2271                 skip_dct[4] = 1;
2272             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2273                 skip_dct[5] = 1;
2274             if (!s->chroma_y_shift) { /* 422 */
2275                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2276                                    dest_cb + uv_dct_offset,
2277                                    wrap_c, 8) < 20 * s->qscale)
2278                     skip_dct[6] = 1;
2279                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2280                                    dest_cr + uv_dct_offset,
2281                                    wrap_c, 8) < 20 * s->qscale)
2282                     skip_dct[7] = 1;
2283             }
2284         }
2285     }
2286
2287     if (s->quantizer_noise_shaping) {
2288         if (!skip_dct[0])
2289             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2290         if (!skip_dct[1])
2291             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2292         if (!skip_dct[2])
2293             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2294         if (!skip_dct[3])
2295             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2296         if (!skip_dct[4])
2297             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2298         if (!skip_dct[5])
2299             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2300         if (!s->chroma_y_shift) { /* 422 */
2301             if (!skip_dct[6])
2302                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2303                                   wrap_c);
2304             if (!skip_dct[7])
2305                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2306                                   wrap_c);
2307         }
2308         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2309     }
2310
2311     /* DCT & quantize */
2312     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2313     {
2314         for (i = 0; i < mb_block_count; i++) {
2315             if (!skip_dct[i]) {
2316                 int overflow;
2317                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2318                 // FIXME we could decide to change to quantizer instead of
2319                 // clipping
2320                 // JS: I don't think that would be a good idea it could lower
2321                 //     quality instead of improve it. Just INTRADC clipping
2322                 //     deserves changes in quantizer
2323                 if (overflow)
2324                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2325             } else
2326                 s->block_last_index[i] = -1;
2327         }
2328         if (s->quantizer_noise_shaping) {
2329             for (i = 0; i < mb_block_count; i++) {
2330                 if (!skip_dct[i]) {
2331                     s->block_last_index[i] =
2332                         dct_quantize_refine(s, s->block[i], weight[i],
2333                                             orig[i], i, s->qscale);
2334                 }
2335             }
2336         }
2337
2338         if (s->luma_elim_threshold && !s->mb_intra)
2339             for (i = 0; i < 4; i++)
2340                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2341         if (s->chroma_elim_threshold && !s->mb_intra)
2342             for (i = 4; i < mb_block_count; i++)
2343                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2344
2345         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2346             for (i = 0; i < mb_block_count; i++) {
2347                 if (s->block_last_index[i] == -1)
2348                     s->coded_score[i] = INT_MAX / 256;
2349             }
2350         }
2351     }
2352
2353     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2354         s->block_last_index[4] =
2355         s->block_last_index[5] = 0;
2356         s->block[4][0] =
2357         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2358         if (!s->chroma_y_shift) { /* 422 / 444 */
2359             for (i=6; i<12; i++) {
2360                 s->block_last_index[i] = 0;
2361                 s->block[i][0] = s->block[4][0];
2362             }
2363         }
2364     }
2365
2366     // non c quantize code returns incorrect block_last_index FIXME
2367     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2368         for (i = 0; i < mb_block_count; i++) {
2369             int j;
2370             if (s->block_last_index[i] > 0) {
2371                 for (j = 63; j > 0; j--) {
2372                     if (s->block[i][s->intra_scantable.permutated[j]])
2373                         break;
2374                 }
2375                 s->block_last_index[i] = j;
2376             }
2377         }
2378     }
2379
2380     /* huffman encode */
2381     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2382     case AV_CODEC_ID_MPEG1VIDEO:
2383     case AV_CODEC_ID_MPEG2VIDEO:
2384         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2385             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2386         break;
2387     case AV_CODEC_ID_MPEG4:
2388         if (CONFIG_MPEG4_ENCODER)
2389             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2390         break;
2391     case AV_CODEC_ID_MSMPEG4V2:
2392     case AV_CODEC_ID_MSMPEG4V3:
2393     case AV_CODEC_ID_WMV1:
2394         if (CONFIG_MSMPEG4_ENCODER)
2395             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2396         break;
2397     case AV_CODEC_ID_WMV2:
2398         if (CONFIG_WMV2_ENCODER)
2399             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2400         break;
2401     case AV_CODEC_ID_H261:
2402         if (CONFIG_H261_ENCODER)
2403             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2404         break;
2405     case AV_CODEC_ID_H263:
2406     case AV_CODEC_ID_H263P:
2407     case AV_CODEC_ID_FLV1:
2408     case AV_CODEC_ID_RV10:
2409     case AV_CODEC_ID_RV20:
2410         if (CONFIG_H263_ENCODER)
2411             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2412         break;
2413     case AV_CODEC_ID_MJPEG:
2414     case AV_CODEC_ID_AMV:
2415         if (CONFIG_MJPEG_ENCODER)
2416             ff_mjpeg_encode_mb(s, s->block);
2417         break;
2418     default:
2419         av_assert1(0);
2420     }
2421 }
2422
2423 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2424 {
2425     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2426     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2427     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2428 }
2429
2430 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2431     int i;
2432
2433     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2434
2435     /* mpeg1 */
2436     d->mb_skip_run= s->mb_skip_run;
2437     for(i=0; i<3; i++)
2438         d->last_dc[i] = s->last_dc[i];
2439
2440     /* statistics */
2441     d->mv_bits= s->mv_bits;
2442     d->i_tex_bits= s->i_tex_bits;
2443     d->p_tex_bits= s->p_tex_bits;
2444     d->i_count= s->i_count;
2445     d->f_count= s->f_count;
2446     d->b_count= s->b_count;
2447     d->skip_count= s->skip_count;
2448     d->misc_bits= s->misc_bits;
2449     d->last_bits= 0;
2450
2451     d->mb_skipped= 0;
2452     d->qscale= s->qscale;
2453     d->dquant= s->dquant;
2454
2455     d->esc3_level_length= s->esc3_level_length;
2456 }
2457
2458 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2459     int i;
2460
2461     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2462     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2463
2464     /* mpeg1 */
2465     d->mb_skip_run= s->mb_skip_run;
2466     for(i=0; i<3; i++)
2467         d->last_dc[i] = s->last_dc[i];
2468
2469     /* statistics */
2470     d->mv_bits= s->mv_bits;
2471     d->i_tex_bits= s->i_tex_bits;
2472     d->p_tex_bits= s->p_tex_bits;
2473     d->i_count= s->i_count;
2474     d->f_count= s->f_count;
2475     d->b_count= s->b_count;
2476     d->skip_count= s->skip_count;
2477     d->misc_bits= s->misc_bits;
2478
2479     d->mb_intra= s->mb_intra;
2480     d->mb_skipped= s->mb_skipped;
2481     d->mv_type= s->mv_type;
2482     d->mv_dir= s->mv_dir;
2483     d->pb= s->pb;
2484     if(s->data_partitioning){
2485         d->pb2= s->pb2;
2486         d->tex_pb= s->tex_pb;
2487     }
2488     d->block= s->block;
2489     for(i=0; i<8; i++)
2490         d->block_last_index[i]= s->block_last_index[i];
2491     d->interlaced_dct= s->interlaced_dct;
2492     d->qscale= s->qscale;
2493
2494     d->esc3_level_length= s->esc3_level_length;
2495 }
2496
2497 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2498                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2499                            int *dmin, int *next_block, int motion_x, int motion_y)
2500 {
2501     int score;
2502     uint8_t *dest_backup[3];
2503
2504     copy_context_before_encode(s, backup, type);
2505
2506     s->block= s->blocks[*next_block];
2507     s->pb= pb[*next_block];
2508     if(s->data_partitioning){
2509         s->pb2   = pb2   [*next_block];
2510         s->tex_pb= tex_pb[*next_block];
2511     }
2512
2513     if(*next_block){
2514         memcpy(dest_backup, s->dest, sizeof(s->dest));
2515         s->dest[0] = s->rd_scratchpad;
2516         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2517         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2518         av_assert0(s->linesize >= 32); //FIXME
2519     }
2520
2521     encode_mb(s, motion_x, motion_y);
2522
2523     score= put_bits_count(&s->pb);
2524     if(s->data_partitioning){
2525         score+= put_bits_count(&s->pb2);
2526         score+= put_bits_count(&s->tex_pb);
2527     }
2528
2529     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2530         ff_mpv_decode_mb(s, s->block);
2531
2532         score *= s->lambda2;
2533         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2534     }
2535
2536     if(*next_block){
2537         memcpy(s->dest, dest_backup, sizeof(s->dest));
2538     }
2539
2540     if(score<*dmin){
2541         *dmin= score;
2542         *next_block^=1;
2543
2544         copy_context_after_encode(best, s, type);
2545     }
2546 }
2547
2548 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2549     uint32_t *sq = ff_square_tab + 256;
2550     int acc=0;
2551     int x,y;
2552
2553     if(w==16 && h==16)
2554         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2555     else if(w==8 && h==8)
2556         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2557
2558     for(y=0; y<h; y++){
2559         for(x=0; x<w; x++){
2560             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2561         }
2562     }
2563
2564     av_assert2(acc>=0);
2565
2566     return acc;
2567 }
2568
2569 static int sse_mb(MpegEncContext *s){
2570     int w= 16;
2571     int h= 16;
2572
2573     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2574     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2575
2576     if(w==16 && h==16)
2577       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2578         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2579                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2580                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2581       }else{
2582         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2583                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2584                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2585       }
2586     else
2587         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2588                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2589                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2590 }
2591
2592 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2593     MpegEncContext *s= *(void**)arg;
2594
2595
2596     s->me.pre_pass=1;
2597     s->me.dia_size= s->avctx->pre_dia_size;
2598     s->first_slice_line=1;
2599     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2600         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2601             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2602         }
2603         s->first_slice_line=0;
2604     }
2605
2606     s->me.pre_pass=0;
2607
2608     return 0;
2609 }
2610
2611 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2612     MpegEncContext *s= *(void**)arg;
2613
2614     ff_check_alignment();
2615
2616     s->me.dia_size= s->avctx->dia_size;
2617     s->first_slice_line=1;
2618     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2619         s->mb_x=0; //for block init below
2620         ff_init_block_index(s);
2621         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2622             s->block_index[0]+=2;
2623             s->block_index[1]+=2;
2624             s->block_index[2]+=2;
2625             s->block_index[3]+=2;
2626
2627             /* compute motion vector & mb_type and store in context */
2628             if(s->pict_type==AV_PICTURE_TYPE_B)
2629                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2630             else
2631                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2632         }
2633         s->first_slice_line=0;
2634     }
2635     return 0;
2636 }
2637
2638 static int mb_var_thread(AVCodecContext *c, void *arg){
2639     MpegEncContext *s= *(void**)arg;
2640     int mb_x, mb_y;
2641
2642     ff_check_alignment();
2643
2644     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2645         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2646             int xx = mb_x * 16;
2647             int yy = mb_y * 16;
2648             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2649             int varc;
2650             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2651
2652             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2653                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2654
2655             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2656             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2657             s->me.mb_var_sum_temp    += varc;
2658         }
2659     }
2660     return 0;
2661 }
2662
2663 static void write_slice_end(MpegEncContext *s){
2664     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2665         if(s->partitioned_frame){
2666             ff_mpeg4_merge_partitions(s);
2667         }
2668
2669         ff_mpeg4_stuffing(&s->pb);
2670     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2671         ff_mjpeg_encode_stuffing(s);
2672     }
2673
2674     avpriv_align_put_bits(&s->pb);
2675     flush_put_bits(&s->pb);
2676
2677     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2678         s->misc_bits+= get_bits_diff(s);
2679 }
2680
2681 static void write_mb_info(MpegEncContext *s)
2682 {
2683     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2684     int offset = put_bits_count(&s->pb);
2685     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2686     int gobn = s->mb_y / s->gob_index;
2687     int pred_x, pred_y;
2688     if (CONFIG_H263_ENCODER)
2689         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2690     bytestream_put_le32(&ptr, offset);
2691     bytestream_put_byte(&ptr, s->qscale);
2692     bytestream_put_byte(&ptr, gobn);
2693     bytestream_put_le16(&ptr, mba);
2694     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2695     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2696     /* 4MV not implemented */
2697     bytestream_put_byte(&ptr, 0); /* hmv2 */
2698     bytestream_put_byte(&ptr, 0); /* vmv2 */
2699 }
2700
2701 static void update_mb_info(MpegEncContext *s, int startcode)
2702 {
2703     if (!s->mb_info)
2704         return;
2705     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2706         s->mb_info_size += 12;
2707         s->prev_mb_info = s->last_mb_info;
2708     }
2709     if (startcode) {
2710         s->prev_mb_info = put_bits_count(&s->pb)/8;
2711         /* This might have incremented mb_info_size above, and we return without
2712          * actually writing any info into that slot yet. But in that case,
2713          * this will be called again at the start of the after writing the
2714          * start code, actually writing the mb info. */
2715         return;
2716     }
2717
2718     s->last_mb_info = put_bits_count(&s->pb)/8;
2719     if (!s->mb_info_size)
2720         s->mb_info_size += 12;
2721     write_mb_info(s);
2722 }
2723
2724 static int encode_thread(AVCodecContext *c, void *arg){
2725     MpegEncContext *s= *(void**)arg;
2726     int mb_x, mb_y, pdif = 0;
2727     int chr_h= 16>>s->chroma_y_shift;
2728     int i, j;
2729     MpegEncContext best_s = { 0 }, backup_s;
2730     uint8_t bit_buf[2][MAX_MB_BYTES];
2731     uint8_t bit_buf2[2][MAX_MB_BYTES];
2732     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2733     PutBitContext pb[2], pb2[2], tex_pb[2];
2734
2735     ff_check_alignment();
2736
2737     for(i=0; i<2; i++){
2738         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2739         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2740         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2741     }
2742
2743     s->last_bits= put_bits_count(&s->pb);
2744     s->mv_bits=0;
2745     s->misc_bits=0;
2746     s->i_tex_bits=0;
2747     s->p_tex_bits=0;
2748     s->i_count=0;
2749     s->f_count=0;
2750     s->b_count=0;
2751     s->skip_count=0;
2752
2753     for(i=0; i<3; i++){
2754         /* init last dc values */
2755         /* note: quant matrix value (8) is implied here */
2756         s->last_dc[i] = 128 << s->intra_dc_precision;
2757
2758         s->current_picture.error[i] = 0;
2759     }
2760     if(s->codec_id==AV_CODEC_ID_AMV){
2761         s->last_dc[0] = 128*8/13;
2762         s->last_dc[1] = 128*8/14;
2763         s->last_dc[2] = 128*8/14;
2764     }
2765     s->mb_skip_run = 0;
2766     memset(s->last_mv, 0, sizeof(s->last_mv));
2767
2768     s->last_mv_dir = 0;
2769
2770     switch(s->codec_id){
2771     case AV_CODEC_ID_H263:
2772     case AV_CODEC_ID_H263P:
2773     case AV_CODEC_ID_FLV1:
2774         if (CONFIG_H263_ENCODER)
2775             s->gob_index = ff_h263_get_gob_height(s);
2776         break;
2777     case AV_CODEC_ID_MPEG4:
2778         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2779             ff_mpeg4_init_partitions(s);
2780         break;
2781     }
2782
2783     s->resync_mb_x=0;
2784     s->resync_mb_y=0;
2785     s->first_slice_line = 1;
2786     s->ptr_lastgob = s->pb.buf;
2787     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2788         s->mb_x=0;
2789         s->mb_y= mb_y;
2790
2791         ff_set_qscale(s, s->qscale);
2792         ff_init_block_index(s);
2793
2794         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2795             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2796             int mb_type= s->mb_type[xy];
2797 //            int d;
2798             int dmin= INT_MAX;
2799             int dir;
2800
2801             if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES
2802                 && s->slice_context_count == 1
2803                 && s->pb.buf == s->avctx->internal->byte_buffer) {
2804                 int new_size =  s->avctx->internal->byte_buffer_size
2805                               + s->avctx->internal->byte_buffer_size/4
2806                               + s->mb_width*MAX_MB_BYTES;
2807                 int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2808                 int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2809
2810                 uint8_t *new_buffer = NULL;
2811                 int new_buffer_size = 0;
2812
2813                 av_fast_padded_malloc(&new_buffer, &new_buffer_size, new_size);
2814                 if (new_buffer) {
2815                     memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2816                     av_free(s->avctx->internal->byte_buffer);
2817                     s->avctx->internal->byte_buffer      = new_buffer;
2818                     s->avctx->internal->byte_buffer_size = new_buffer_size;
2819                     rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2820                     s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2821                     s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2822                 }
2823             }
2824             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2825                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2826                 return -1;
2827             }
2828             if(s->data_partitioning){
2829                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2830                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2831                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2832                     return -1;
2833                 }
2834             }
2835
2836             s->mb_x = mb_x;
2837             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2838             ff_update_block_index(s);
2839
2840             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2841                 ff_h261_reorder_mb_index(s);
2842                 xy= s->mb_y*s->mb_stride + s->mb_x;
2843                 mb_type= s->mb_type[xy];
2844             }
2845
2846             /* write gob / video packet header  */
2847             if(s->rtp_mode){
2848                 int current_packet_size, is_gob_start;
2849
2850                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2851
2852                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2853
2854                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2855
2856                 switch(s->codec_id){
2857                 case AV_CODEC_ID_H263:
2858                 case AV_CODEC_ID_H263P:
2859                     if(!s->h263_slice_structured)
2860                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2861                     break;
2862                 case AV_CODEC_ID_MPEG2VIDEO:
2863                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2864                 case AV_CODEC_ID_MPEG1VIDEO:
2865                     if(s->mb_skip_run) is_gob_start=0;
2866                     break;
2867                 case AV_CODEC_ID_MJPEG:
2868                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2869                     break;
2870                 }
2871
2872                 if(is_gob_start){
2873                     if(s->start_mb_y != mb_y || mb_x!=0){
2874                         write_slice_end(s);
2875
2876                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2877                             ff_mpeg4_init_partitions(s);
2878                         }
2879                     }
2880
2881                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2882                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2883
2884                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2885                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2886                         int d = 100 / s->error_rate;
2887                         if(r % d == 0){
2888                             current_packet_size=0;
2889                             s->pb.buf_ptr= s->ptr_lastgob;
2890                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2891                         }
2892                     }
2893
2894                     if (s->avctx->rtp_callback){
2895                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2896                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2897                     }
2898                     update_mb_info(s, 1);
2899
2900                     switch(s->codec_id){
2901                     case AV_CODEC_ID_MPEG4:
2902                         if (CONFIG_MPEG4_ENCODER) {
2903                             ff_mpeg4_encode_video_packet_header(s);
2904                             ff_mpeg4_clean_buffers(s);
2905                         }
2906                     break;
2907                     case AV_CODEC_ID_MPEG1VIDEO:
2908                     case AV_CODEC_ID_MPEG2VIDEO:
2909                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2910                             ff_mpeg1_encode_slice_header(s);
2911                             ff_mpeg1_clean_buffers(s);
2912                         }
2913                     break;
2914                     case AV_CODEC_ID_H263:
2915                     case AV_CODEC_ID_H263P:
2916                         if (CONFIG_H263_ENCODER)
2917                             ff_h263_encode_gob_header(s, mb_y);
2918                     break;
2919                     }
2920
2921                     if(s->flags&CODEC_FLAG_PASS1){
2922                         int bits= put_bits_count(&s->pb);
2923                         s->misc_bits+= bits - s->last_bits;
2924                         s->last_bits= bits;
2925                     }
2926
2927                     s->ptr_lastgob += current_packet_size;
2928                     s->first_slice_line=1;
2929                     s->resync_mb_x=mb_x;
2930                     s->resync_mb_y=mb_y;
2931                 }
2932             }
2933
2934             if(  (s->resync_mb_x   == s->mb_x)
2935                && s->resync_mb_y+1 == s->mb_y){
2936                 s->first_slice_line=0;
2937             }
2938
2939             s->mb_skipped=0;
2940             s->dquant=0; //only for QP_RD
2941
2942             update_mb_info(s, 0);
2943
2944             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2945                 int next_block=0;
2946                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2947
2948                 copy_context_before_encode(&backup_s, s, -1);
2949                 backup_s.pb= s->pb;
2950                 best_s.data_partitioning= s->data_partitioning;
2951                 best_s.partitioned_frame= s->partitioned_frame;
2952                 if(s->data_partitioning){
2953                     backup_s.pb2= s->pb2;
2954                     backup_s.tex_pb= s->tex_pb;
2955                 }
2956
2957                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2958                     s->mv_dir = MV_DIR_FORWARD;
2959                     s->mv_type = MV_TYPE_16X16;
2960                     s->mb_intra= 0;
2961                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2962                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2963                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2964                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2965                 }
2966                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2967                     s->mv_dir = MV_DIR_FORWARD;
2968                     s->mv_type = MV_TYPE_FIELD;
2969                     s->mb_intra= 0;
2970                     for(i=0; i<2; i++){
2971                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2972                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2973                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2974                     }
2975                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2976                                  &dmin, &next_block, 0, 0);
2977                 }
2978                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2979                     s->mv_dir = MV_DIR_FORWARD;
2980                     s->mv_type = MV_TYPE_16X16;
2981                     s->mb_intra= 0;
2982                     s->mv[0][0][0] = 0;
2983                     s->mv[0][0][1] = 0;
2984                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2985                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2986                 }
2987                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2988                     s->mv_dir = MV_DIR_FORWARD;
2989                     s->mv_type = MV_TYPE_8X8;
2990                     s->mb_intra= 0;
2991                     for(i=0; i<4; i++){
2992                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2993                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2994                     }
2995                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2996                                  &dmin, &next_block, 0, 0);
2997                 }
2998                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2999                     s->mv_dir = MV_DIR_FORWARD;
3000                     s->mv_type = MV_TYPE_16X16;
3001                     s->mb_intra= 0;
3002                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3003                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3004                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3005                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3006                 }
3007                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3008                     s->mv_dir = MV_DIR_BACKWARD;
3009                     s->mv_type = MV_TYPE_16X16;
3010                     s->mb_intra= 0;
3011                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3012                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3013                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3014                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3015                 }
3016                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3017                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3018                     s->mv_type = MV_TYPE_16X16;
3019                     s->mb_intra= 0;
3020                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3021                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3022                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3023                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3024                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3025                                  &dmin, &next_block, 0, 0);
3026                 }
3027                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3028                     s->mv_dir = MV_DIR_FORWARD;
3029                     s->mv_type = MV_TYPE_FIELD;
3030                     s->mb_intra= 0;
3031                     for(i=0; i<2; i++){
3032                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3033                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3034                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3035                     }
3036                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3037                                  &dmin, &next_block, 0, 0);
3038                 }
3039                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3040                     s->mv_dir = MV_DIR_BACKWARD;
3041                     s->mv_type = MV_TYPE_FIELD;
3042                     s->mb_intra= 0;
3043                     for(i=0; i<2; i++){
3044                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3045                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3046                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3047                     }
3048                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3049                                  &dmin, &next_block, 0, 0);
3050                 }
3051                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3052                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3053                     s->mv_type = MV_TYPE_FIELD;
3054                     s->mb_intra= 0;
3055                     for(dir=0; dir<2; dir++){
3056                         for(i=0; i<2; i++){
3057                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3058                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3059                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3060                         }
3061                     }
3062                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3063                                  &dmin, &next_block, 0, 0);
3064                 }
3065                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3066                     s->mv_dir = 0;
3067                     s->mv_type = MV_TYPE_16X16;
3068                     s->mb_intra= 1;
3069                     s->mv[0][0][0] = 0;
3070                     s->mv[0][0][1] = 0;
3071                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3072                                  &dmin, &next_block, 0, 0);
3073                     if(s->h263_pred || s->h263_aic){
3074                         if(best_s.mb_intra)
3075                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3076                         else
3077                             ff_clean_intra_table_entries(s); //old mode?
3078                     }
3079                 }
3080
3081                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3082                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3083                         const int last_qp= backup_s.qscale;
3084                         int qpi, qp, dc[6];
3085                         int16_t ac[6][16];
3086                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3087                         static const int dquant_tab[4]={-1,1,-2,2};
3088                         int storecoefs = s->mb_intra && s->dc_val[0];
3089
3090                         av_assert2(backup_s.dquant == 0);
3091
3092                         //FIXME intra
3093                         s->mv_dir= best_s.mv_dir;
3094                         s->mv_type = MV_TYPE_16X16;
3095                         s->mb_intra= best_s.mb_intra;
3096                         s->mv[0][0][0] = best_s.mv[0][0][0];
3097                         s->mv[0][0][1] = best_s.mv[0][0][1];
3098                         s->mv[1][0][0] = best_s.mv[1][0][0];
3099                         s->mv[1][0][1] = best_s.mv[1][0][1];
3100
3101                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3102                         for(; qpi<4; qpi++){
3103                             int dquant= dquant_tab[qpi];
3104                             qp= last_qp + dquant;
3105                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3106                                 continue;
3107                             backup_s.dquant= dquant;
3108                             if(storecoefs){
3109                                 for(i=0; i<6; i++){
3110                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3111                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3112                                 }
3113                             }
3114
3115                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3116                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3117                             if(best_s.qscale != qp){
3118                                 if(storecoefs){
3119                                     for(i=0; i<6; i++){
3120                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3121                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3122                                     }
3123                                 }
3124                             }
3125                         }
3126                     }
3127                 }
3128                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3129                     int mx= s->b_direct_mv_table[xy][0];
3130                     int my= s->b_direct_mv_table[xy][1];
3131
3132                     backup_s.dquant = 0;
3133                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3134                     s->mb_intra= 0;
3135                     ff_mpeg4_set_direct_mv(s, mx, my);
3136                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3137                                  &dmin, &next_block, mx, my);
3138                 }
3139                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3140                     backup_s.dquant = 0;
3141                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3142                     s->mb_intra= 0;
3143                     ff_mpeg4_set_direct_mv(s, 0, 0);
3144                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3145                                  &dmin, &next_block, 0, 0);
3146                 }
3147                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3148                     int coded=0;
3149                     for(i=0; i<6; i++)
3150                         coded |= s->block_last_index[i];
3151                     if(coded){
3152                         int mx,my;
3153                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3154                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3155                             mx=my=0; //FIXME find the one we actually used
3156                             ff_mpeg4_set_direct_mv(s, mx, my);
3157                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3158                             mx= s->mv[1][0][0];
3159                             my= s->mv[1][0][1];
3160                         }else{
3161                             mx= s->mv[0][0][0];
3162                             my= s->mv[0][0][1];
3163                         }
3164
3165                         s->mv_dir= best_s.mv_dir;
3166                         s->mv_type = best_s.mv_type;
3167                         s->mb_intra= 0;
3168 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3169                         s->mv[0][0][1] = best_s.mv[0][0][1];
3170                         s->mv[1][0][0] = best_s.mv[1][0][0];
3171                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3172                         backup_s.dquant= 0;
3173                         s->skipdct=1;
3174                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3175                                         &dmin, &next_block, mx, my);
3176                         s->skipdct=0;
3177                     }
3178                 }
3179
3180                 s->current_picture.qscale_table[xy] = best_s.qscale;
3181
3182                 copy_context_after_encode(s, &best_s, -1);
3183
3184                 pb_bits_count= put_bits_count(&s->pb);
3185                 flush_put_bits(&s->pb);
3186                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3187                 s->pb= backup_s.pb;
3188
3189                 if(s->data_partitioning){
3190                     pb2_bits_count= put_bits_count(&s->pb2);
3191                     flush_put_bits(&s->pb2);
3192                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3193                     s->pb2= backup_s.pb2;
3194
3195                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3196                     flush_put_bits(&s->tex_pb);
3197                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3198                     s->tex_pb= backup_s.tex_pb;
3199                 }
3200                 s->last_bits= put_bits_count(&s->pb);
3201
3202                 if (CONFIG_H263_ENCODER &&
3203                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3204                     ff_h263_update_motion_val(s);
3205
3206                 if(next_block==0){ //FIXME 16 vs linesize16
3207                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3208                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3209                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3210                 }
3211
3212                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3213                     ff_mpv_decode_mb(s, s->block);
3214             } else {
3215                 int motion_x = 0, motion_y = 0;
3216                 s->mv_type=MV_TYPE_16X16;
3217                 // only one MB-Type possible
3218
3219                 switch(mb_type){
3220                 case CANDIDATE_MB_TYPE_INTRA:
3221                     s->mv_dir = 0;
3222                     s->mb_intra= 1;
3223                     motion_x= s->mv[0][0][0] = 0;
3224                     motion_y= s->mv[0][0][1] = 0;
3225                     break;
3226                 case CANDIDATE_MB_TYPE_INTER:
3227                     s->mv_dir = MV_DIR_FORWARD;
3228                     s->mb_intra= 0;
3229                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3230                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3231                     break;
3232                 case CANDIDATE_MB_TYPE_INTER_I:
3233                     s->mv_dir = MV_DIR_FORWARD;
3234                     s->mv_type = MV_TYPE_FIELD;
3235                     s->mb_intra= 0;
3236                     for(i=0; i<2; i++){
3237                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3238                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3239                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3240                     }
3241                     break;
3242                 case CANDIDATE_MB_TYPE_INTER4V:
3243                     s->mv_dir = MV_DIR_FORWARD;
3244                     s->mv_type = MV_TYPE_8X8;
3245                     s->mb_intra= 0;
3246                     for(i=0; i<4; i++){
3247                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3248                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3249                     }
3250                     break;
3251                 case CANDIDATE_MB_TYPE_DIRECT:
3252                     if (CONFIG_MPEG4_ENCODER) {
3253                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3254                         s->mb_intra= 0;
3255                         motion_x=s->b_direct_mv_table[xy][0];
3256                         motion_y=s->b_direct_mv_table[xy][1];
3257                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3258                     }
3259                     break;
3260                 case CANDIDATE_MB_TYPE_DIRECT0:
3261                     if (CONFIG_MPEG4_ENCODER) {
3262                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3263                         s->mb_intra= 0;
3264                         ff_mpeg4_set_direct_mv(s, 0, 0);
3265                     }
3266                     break;
3267                 case CANDIDATE_MB_TYPE_BIDIR:
3268                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3269                     s->mb_intra= 0;
3270                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3271                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3272                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3273                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3274                     break;
3275                 case CANDIDATE_MB_TYPE_BACKWARD:
3276                     s->mv_dir = MV_DIR_BACKWARD;
3277                     s->mb_intra= 0;
3278                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3279                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3280                     break;
3281                 case CANDIDATE_MB_TYPE_FORWARD:
3282                     s->mv_dir = MV_DIR_FORWARD;
3283                     s->mb_intra= 0;
3284                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3285                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3286                     break;
3287                 case CANDIDATE_MB_TYPE_FORWARD_I:
3288                     s->mv_dir = MV_DIR_FORWARD;
3289                     s->mv_type = MV_TYPE_FIELD;
3290                     s->mb_intra= 0;
3291                     for(i=0; i<2; i++){
3292                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3293                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3294                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3295                     }
3296                     break;
3297                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3298                     s->mv_dir = MV_DIR_BACKWARD;
3299                     s->mv_type = MV_TYPE_FIELD;
3300                     s->mb_intra= 0;
3301                     for(i=0; i<2; i++){
3302                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3303                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3304                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3305                     }
3306                     break;
3307                 case CANDIDATE_MB_TYPE_BIDIR_I:
3308                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3309                     s->mv_type = MV_TYPE_FIELD;
3310                     s->mb_intra= 0;
3311                     for(dir=0; dir<2; dir++){
3312                         for(i=0; i<2; i++){
3313                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3314                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3315                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3316                         }
3317                     }
3318                     break;
3319                 default:
3320                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3321                 }
3322
3323                 encode_mb(s, motion_x, motion_y);
3324
3325                 // RAL: Update last macroblock type
3326                 s->last_mv_dir = s->mv_dir;
3327
3328                 if (CONFIG_H263_ENCODER &&
3329                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3330                     ff_h263_update_motion_val(s);
3331
3332                 ff_mpv_decode_mb(s, s->block);
3333             }
3334
3335             /* clean the MV table in IPS frames for direct mode in B frames */
3336             if(s->mb_intra /* && I,P,S_TYPE */){
3337                 s->p_mv_table[xy][0]=0;
3338                 s->p_mv_table[xy][1]=0;
3339             }
3340
3341             if(s->flags&CODEC_FLAG_PSNR){
3342                 int w= 16;
3343                 int h= 16;
3344
3345                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3346                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3347
3348                 s->current_picture.error[0] += sse(
3349                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3350                     s->dest[0], w, h, s->linesize);
3351                 s->current_picture.error[1] += sse(
3352                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3353                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3354                 s->current_picture.error[2] += sse(
3355                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3356                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3357             }
3358             if(s->loop_filter){
3359                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3360                     ff_h263_loop_filter(s);
3361             }
3362             av_dlog(s->avctx, "MB %d %d bits\n",
3363                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3364         }
3365     }
3366
3367     //not beautiful here but we must write it before flushing so it has to be here
3368     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3369         ff_msmpeg4_encode_ext_header(s);
3370
3371     write_slice_end(s);
3372
3373     /* Send the last GOB if RTP */
3374     if (s->avctx->rtp_callback) {
3375         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3376         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3377         /* Call the RTP callback to send the last GOB */
3378         emms_c();
3379         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3380     }
3381
3382     return 0;
3383 }
3384
3385 #define MERGE(field) dst->field += src->field; src->field=0
3386 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3387     MERGE(me.scene_change_score);
3388     MERGE(me.mc_mb_var_sum_temp);
3389     MERGE(me.mb_var_sum_temp);
3390 }
3391
3392 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3393     int i;
3394
3395     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3396     MERGE(dct_count[1]);
3397     MERGE(mv_bits);
3398     MERGE(i_tex_bits);
3399     MERGE(p_tex_bits);
3400     MERGE(i_count);
3401     MERGE(f_count);
3402     MERGE(b_count);
3403     MERGE(skip_count);
3404     MERGE(misc_bits);
3405     MERGE(er.error_count);
3406     MERGE(padding_bug_score);
3407     MERGE(current_picture.error[0]);
3408     MERGE(current_picture.error[1]);
3409     MERGE(current_picture.error[2]);
3410
3411     if(dst->avctx->noise_reduction){
3412         for(i=0; i<64; i++){
3413             MERGE(dct_error_sum[0][i]);
3414             MERGE(dct_error_sum[1][i]);
3415         }
3416     }
3417
3418     assert(put_bits_count(&src->pb) % 8 ==0);
3419     assert(put_bits_count(&dst->pb) % 8 ==0);
3420     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3421     flush_put_bits(&dst->pb);
3422 }
3423
3424 static int estimate_qp(MpegEncContext *s, int dry_run){
3425     if (s->next_lambda){
3426         s->current_picture_ptr->f->quality =
3427         s->current_picture.f->quality = s->next_lambda;
3428         if(!dry_run) s->next_lambda= 0;
3429     } else if (!s->fixed_qscale) {
3430         s->current_picture_ptr->f->quality =
3431         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3432         if (s->current_picture.f->quality < 0)
3433             return -1;
3434     }
3435
3436     if(s->adaptive_quant){
3437         switch(s->codec_id){
3438         case AV_CODEC_ID_MPEG4:
3439             if (CONFIG_MPEG4_ENCODER)
3440                 ff_clean_mpeg4_qscales(s);
3441             break;
3442         case AV_CODEC_ID_H263:
3443         case AV_CODEC_ID_H263P:
3444         case AV_CODEC_ID_FLV1:
3445             if (CONFIG_H263_ENCODER)
3446                 ff_clean_h263_qscales(s);
3447             break;
3448         default:
3449             ff_init_qscale_tab(s);
3450         }
3451
3452         s->lambda= s->lambda_table[0];
3453         //FIXME broken
3454     }else
3455         s->lambda = s->current_picture.f->quality;
3456     update_qscale(s);
3457     return 0;
3458 }
3459
3460 /* must be called before writing the header */
3461 static void set_frame_distances(MpegEncContext * s){
3462     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3463     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3464
3465     if(s->pict_type==AV_PICTURE_TYPE_B){
3466         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3467         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3468     }else{
3469         s->pp_time= s->time - s->last_non_b_time;
3470         s->last_non_b_time= s->time;
3471         assert(s->picture_number==0 || s->pp_time > 0);
3472     }
3473 }
3474
3475 static int encode_picture(MpegEncContext *s, int picture_number)
3476 {
3477     int i, ret;
3478     int bits;
3479     int context_count = s->slice_context_count;
3480
3481     s->picture_number = picture_number;
3482
3483     /* Reset the average MB variance */
3484     s->me.mb_var_sum_temp    =
3485     s->me.mc_mb_var_sum_temp = 0;
3486
3487     /* we need to initialize some time vars before we can encode b-frames */
3488     // RAL: Condition added for MPEG1VIDEO
3489     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3490         set_frame_distances(s);
3491     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3492         ff_set_mpeg4_time(s);
3493
3494     s->me.scene_change_score=0;
3495
3496 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3497
3498     if(s->pict_type==AV_PICTURE_TYPE_I){
3499         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3500         else                        s->no_rounding=0;
3501     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3502         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3503             s->no_rounding ^= 1;
3504     }
3505
3506     if(s->flags & CODEC_FLAG_PASS2){
3507         if (estimate_qp(s,1) < 0)
3508             return -1;
3509         ff_get_2pass_fcode(s);
3510     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3511         if(s->pict_type==AV_PICTURE_TYPE_B)
3512             s->lambda= s->last_lambda_for[s->pict_type];
3513         else
3514             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3515         update_qscale(s);
3516     }
3517
3518     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3519         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3520         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3521         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3522         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3523     }
3524
3525     s->mb_intra=0; //for the rate distortion & bit compare functions
3526     for(i=1; i<context_count; i++){
3527         ret = ff_update_duplicate_context(s->thread_context[i], s);
3528         if (ret < 0)
3529             return ret;
3530     }
3531
3532     if(ff_init_me(s)<0)
3533         return -1;
3534
3535     /* Estimate motion for every MB */
3536     if(s->pict_type != AV_PICTURE_TYPE_I){
3537         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3538         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3539         if (s->pict_type != AV_PICTURE_TYPE_B) {
3540             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3541                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3542             }
3543         }
3544
3545         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3546     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3547         /* I-Frame */
3548         for(i=0; i<s->mb_stride*s->mb_height; i++)
3549             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3550
3551         if(!s->fixed_qscale){
3552             /* finding spatial complexity for I-frame rate control */
3553             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3554         }
3555     }
3556     for(i=1; i<context_count; i++){
3557         merge_context_after_me(s, s->thread_context[i]);
3558     }
3559     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3560     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3561     emms_c();
3562
3563     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3564         s->pict_type= AV_PICTURE_TYPE_I;
3565         for(i=0; i<s->mb_stride*s->mb_height; i++)
3566             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3567         if(s->msmpeg4_version >= 3)
3568             s->no_rounding=1;
3569         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3570                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3571     }
3572
3573     if(!s->umvplus){
3574         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3575             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3576
3577             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3578                 int a,b;
3579                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3580                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3581                 s->f_code= FFMAX3(s->f_code, a, b);
3582             }
3583
3584             ff_fix_long_p_mvs(s);
3585             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3586             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3587                 int j;
3588                 for(i=0; i<2; i++){
3589                     for(j=0; j<2; j++)
3590                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3591                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3592                 }
3593             }
3594         }
3595
3596         if(s->pict_type==AV_PICTURE_TYPE_B){
3597             int a, b;
3598
3599             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3600             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3601             s->f_code = FFMAX(a, b);
3602
3603             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3604             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3605             s->b_code = FFMAX(a, b);
3606
3607             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3608             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3609             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3610             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3611             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3612                 int dir, j;
3613                 for(dir=0; dir<2; dir++){
3614                     for(i=0; i<2; i++){
3615                         for(j=0; j<2; j++){
3616                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3617                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3618                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3619                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3620                         }
3621                     }
3622                 }
3623             }
3624         }
3625     }
3626
3627     if (estimate_qp(s, 0) < 0)
3628         return -1;
3629
3630     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3631         s->qscale= 3; //reduce clipping problems
3632
3633     if (s->out_format == FMT_MJPEG) {
3634         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3635         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3636
3637         if (s->avctx->intra_matrix) {
3638             chroma_matrix =
3639             luma_matrix = s->avctx->intra_matrix;
3640         }
3641         if (s->avctx->chroma_intra_matrix)
3642             chroma_matrix = s->avctx->chroma_intra_matrix;
3643
3644         /* for mjpeg, we do include qscale in the matrix */
3645         for(i=1;i<64;i++){
3646             int j = s->idsp.idct_permutation[i];
3647
3648             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3649             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3650         }
3651         s->y_dc_scale_table=
3652         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3653         s->chroma_intra_matrix[0] =
3654         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3655         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3656                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3657         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3658                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3659         s->qscale= 8;
3660     }
3661     if(s->codec_id == AV_CODEC_ID_AMV){
3662         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3663         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3664         for(i=1;i<64;i++){
3665             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3666
3667             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3668             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3669         }
3670         s->y_dc_scale_table= y;
3671         s->c_dc_scale_table= c;
3672         s->intra_matrix[0] = 13;
3673         s->chroma_intra_matrix[0] = 14;
3674         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3675                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3676         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3677                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3678         s->qscale= 8;
3679     }
3680
3681     //FIXME var duplication
3682     s->current_picture_ptr->f->key_frame =
3683     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3684     s->current_picture_ptr->f->pict_type =
3685     s->current_picture.f->pict_type = s->pict_type;
3686
3687     if (s->current_picture.f->key_frame)
3688         s->picture_in_gop_number=0;
3689
3690     s->mb_x = s->mb_y = 0;
3691     s->last_bits= put_bits_count(&s->pb);
3692     switch(s->out_format) {
3693     case FMT_MJPEG:
3694         if (CONFIG_MJPEG_ENCODER)
3695             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3696                                            s->intra_matrix, s->chroma_intra_matrix);
3697         break;
3698     case FMT_H261:
3699         if (CONFIG_H261_ENCODER)
3700             ff_h261_encode_picture_header(s, picture_number);
3701         break;
3702     case FMT_H263:
3703         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3704             ff_wmv2_encode_picture_header(s, picture_number);
3705         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3706             ff_msmpeg4_encode_picture_header(s, picture_number);
3707         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3708             ff_mpeg4_encode_picture_header(s, picture_number);
3709         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3710             ff_rv10_encode_picture_header(s, picture_number);
3711         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3712             ff_rv20_encode_picture_header(s, picture_number);
3713         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3714             ff_flv_encode_picture_header(s, picture_number);
3715         else if (CONFIG_H263_ENCODER)
3716             ff_h263_encode_picture_header(s, picture_number);
3717         break;
3718     case FMT_MPEG1:
3719         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3720             ff_mpeg1_encode_picture_header(s, picture_number);
3721         break;
3722     default:
3723         av_assert0(0);
3724     }
3725     bits= put_bits_count(&s->pb);
3726     s->header_bits= bits - s->last_bits;
3727
3728     for(i=1; i<context_count; i++){
3729         update_duplicate_context_after_me(s->thread_context[i], s);
3730     }
3731     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3732     for(i=1; i<context_count; i++){
3733         merge_context_after_encode(s, s->thread_context[i]);
3734     }
3735     emms_c();
3736     return 0;
3737 }
3738
3739 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3740     const int intra= s->mb_intra;
3741     int i;
3742
3743     s->dct_count[intra]++;
3744
3745     for(i=0; i<64; i++){
3746         int level= block[i];
3747
3748         if(level){
3749             if(level>0){
3750                 s->dct_error_sum[intra][i] += level;
3751                 level -= s->dct_offset[intra][i];
3752                 if(level<0) level=0;
3753             }else{
3754                 s->dct_error_sum[intra][i] -= level;
3755                 level += s->dct_offset[intra][i];
3756                 if(level>0) level=0;
3757             }
3758             block[i]= level;
3759         }
3760     }
3761 }
3762
3763 static int dct_quantize_trellis_c(MpegEncContext *s,
3764                                   int16_t *block, int n,
3765                                   int qscale, int *overflow){
3766     const int *qmat;
3767     const uint16_t *matrix;
3768     const uint8_t *scantable= s->intra_scantable.scantable;
3769     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3770     int max=0;
3771     unsigned int threshold1, threshold2;
3772     int bias=0;
3773     int run_tab[65];
3774     int level_tab[65];
3775     int score_tab[65];
3776     int survivor[65];
3777     int survivor_count;
3778     int last_run=0;
3779     int last_level=0;
3780     int last_score= 0;
3781     int last_i;
3782     int coeff[2][64];
3783     int coeff_count[64];
3784     int qmul, qadd, start_i, last_non_zero, i, dc;
3785     const int esc_length= s->ac_esc_length;
3786     uint8_t * length;
3787     uint8_t * last_length;
3788     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3789
3790     s->fdsp.fdct(block);
3791
3792     if(s->dct_error_sum)
3793         s->denoise_dct(s, block);
3794     qmul= qscale*16;
3795     qadd= ((qscale-1)|1)*8;
3796
3797     if (s->mb_intra) {
3798         int q;
3799         if (!s->h263_aic) {
3800             if (n < 4)
3801                 q = s->y_dc_scale;
3802             else
3803                 q = s->c_dc_scale;
3804             q = q << 3;
3805         } else{
3806             /* For AIC we skip quant/dequant of INTRADC */
3807             q = 1 << 3;
3808             qadd=0;
3809         }
3810
3811         /* note: block[0] is assumed to be positive */
3812         block[0] = (block[0] + (q >> 1)) / q;
3813         start_i = 1;
3814         last_non_zero = 0;
3815         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3816         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3817         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3818             bias= 1<<(QMAT_SHIFT-1);
3819
3820         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3821             length     = s->intra_chroma_ac_vlc_length;
3822             last_length= s->intra_chroma_ac_vlc_last_length;
3823         } else {
3824             length     = s->intra_ac_vlc_length;
3825             last_length= s->intra_ac_vlc_last_length;
3826         }
3827     } else {
3828         start_i = 0;
3829         last_non_zero = -1;
3830         qmat = s->q_inter_matrix[qscale];
3831         matrix = s->inter_matrix;
3832         length     = s->inter_ac_vlc_length;
3833         last_length= s->inter_ac_vlc_last_length;
3834     }
3835     last_i= start_i;
3836
3837     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3838     threshold2= (threshold1<<1);
3839
3840     for(i=63; i>=start_i; i--) {
3841         const int j = scantable[i];
3842         int level = block[j] * qmat[j];
3843
3844         if(((unsigned)(level+threshold1))>threshold2){
3845             last_non_zero = i;
3846             break;
3847         }
3848     }
3849
3850     for(i=start_i; i<=last_non_zero; i++) {
3851         const int j = scantable[i];
3852         int level = block[j] * qmat[j];
3853
3854 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3855 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3856         if(((unsigned)(level+threshold1))>threshold2){
3857             if(level>0){
3858                 level= (bias + level)>>QMAT_SHIFT;
3859                 coeff[0][i]= level;
3860                 coeff[1][i]= level-1;
3861 //                coeff[2][k]= level-2;
3862             }else{
3863                 level= (bias - level)>>QMAT_SHIFT;
3864                 coeff[0][i]= -level;
3865                 coeff[1][i]= -level+1;
3866 //                coeff[2][k]= -level+2;
3867             }
3868             coeff_count[i]= FFMIN(level, 2);
3869             av_assert2(coeff_count[i]);
3870             max |=level;
3871         }else{
3872             coeff[0][i]= (level>>31)|1;
3873             coeff_count[i]= 1;
3874         }
3875     }
3876
3877     *overflow= s->max_qcoeff < max; //overflow might have happened
3878
3879     if(last_non_zero < start_i){
3880         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3881         return last_non_zero;
3882     }
3883
3884     score_tab[start_i]= 0;
3885     survivor[0]= start_i;
3886     survivor_count= 1;
3887
3888     for(i=start_i; i<=last_non_zero; i++){
3889         int level_index, j, zero_distortion;
3890         int dct_coeff= FFABS(block[ scantable[i] ]);
3891         int best_score=256*256*256*120;
3892
3893         if (s->fdsp.fdct == ff_fdct_ifast)
3894             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3895         zero_distortion= dct_coeff*dct_coeff;
3896
3897         for(level_index=0; level_index < coeff_count[i]; level_index++){
3898             int distortion;
3899             int level= coeff[level_index][i];
3900             const int alevel= FFABS(level);
3901             int unquant_coeff;
3902
3903             av_assert2(level);
3904
3905             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3906                 unquant_coeff= alevel*qmul + qadd;
3907             } else if(s->out_format == FMT_MJPEG) {
3908                 j = s->idsp.idct_permutation[scantable[i]];
3909                 unquant_coeff = alevel * matrix[j] * 8;
3910             }else{ //MPEG1
3911                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3912                 if(s->mb_intra){
3913                         unquant_coeff = (int)(  alevel  * qscale * matrix[j]) >> 3;
3914                         unquant_coeff =   (unquant_coeff - 1) | 1;
3915                 }else{
3916                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[j])) >> 4;
3917                         unquant_coeff =   (unquant_coeff - 1) | 1;
3918                 }
3919                 unquant_coeff<<= 3;
3920             }
3921
3922             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3923             level+=64;
3924             if((level&(~127)) == 0){
3925                 for(j=survivor_count-1; j>=0; j--){
3926                     int run= i - survivor[j];
3927                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3928                     score += score_tab[i-run];
3929
3930                     if(score < best_score){
3931                         best_score= score;
3932                         run_tab[i+1]= run;
3933                         level_tab[i+1]= level-64;
3934                     }
3935                 }
3936
3937                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3938                     for(j=survivor_count-1; j>=0; j--){
3939                         int run= i - survivor[j];
3940                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3941                         score += score_tab[i-run];
3942                         if(score < last_score){
3943                             last_score= score;
3944                             last_run= run;
3945                             last_level= level-64;
3946                             last_i= i+1;
3947                         }
3948                     }
3949                 }
3950             }else{
3951                 distortion += esc_length*lambda;
3952                 for(j=survivor_count-1; j>=0; j--){
3953                     int run= i - survivor[j];
3954                     int score= distortion + score_tab[i-run];
3955
3956                     if(score < best_score){
3957                         best_score= score;
3958                         run_tab[i+1]= run;
3959                         level_tab[i+1]= level-64;
3960                     }
3961                 }
3962
3963                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3964                   for(j=survivor_count-1; j>=0; j--){
3965                         int run= i - survivor[j];
3966                         int score= distortion + score_tab[i-run];
3967                         if(score < last_score){
3968                             last_score= score;
3969                             last_run= run;
3970                             last_level= level-64;
3971                             last_i= i+1;
3972                         }
3973                     }
3974                 }
3975             }
3976         }
3977
3978         score_tab[i+1]= best_score;
3979
3980         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3981         if(last_non_zero <= 27){
3982             for(; survivor_count; survivor_count--){
3983                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3984                     break;
3985             }
3986         }else{
3987             for(; survivor_count; survivor_count--){
3988                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3989                     break;
3990             }
3991         }
3992
3993         survivor[ survivor_count++ ]= i+1;
3994     }
3995
3996     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3997         last_score= 256*256*256*120;
3998         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3999             int score= score_tab[i];
4000             if(i) score += lambda*2; //FIXME exacter?
4001
4002             if(score < last_score){
4003                 last_score= score;
4004                 last_i= i;
4005                 last_level= level_tab[i];
4006                 last_run= run_tab[i];
4007             }
4008         }
4009     }
4010
4011     s->coded_score[n] = last_score;
4012
4013     dc= FFABS(block[0]);
4014     last_non_zero= last_i - 1;
4015     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4016
4017     if(last_non_zero < start_i)
4018         return last_non_zero;
4019
4020     if(last_non_zero == 0 && start_i == 0){
4021         int best_level= 0;
4022         int best_score= dc * dc;
4023
4024         for(i=0; i<coeff_count[0]; i++){
4025             int level= coeff[i][0];
4026             int alevel= FFABS(level);
4027             int unquant_coeff, score, distortion;
4028
4029             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4030                     unquant_coeff= (alevel*qmul + qadd)>>3;
4031             }else{ //MPEG1
4032                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[0])) >> 4;
4033                     unquant_coeff =   (unquant_coeff - 1) | 1;
4034             }
4035             unquant_coeff = (unquant_coeff + 4) >> 3;
4036             unquant_coeff<<= 3 + 3;
4037
4038             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4039             level+=64;
4040             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4041             else                    score= distortion + esc_length*lambda;
4042
4043             if(score < best_score){
4044                 best_score= score;
4045                 best_level= level - 64;
4046             }
4047         }
4048         block[0]= best_level;
4049         s->coded_score[n] = best_score - dc*dc;
4050         if(best_level == 0) return -1;
4051         else                return last_non_zero;
4052     }
4053
4054     i= last_i;
4055     av_assert2(last_level);
4056
4057     block[ perm_scantable[last_non_zero] ]= last_level;
4058     i -= last_run + 1;
4059
4060     for(; i>start_i; i -= run_tab[i] + 1){
4061         block[ perm_scantable[i-1] ]= level_tab[i];
4062     }
4063
4064     return last_non_zero;
4065 }
4066
4067 //#define REFINE_STATS 1
4068 static int16_t basis[64][64];
4069
4070 static void build_basis(uint8_t *perm){
4071     int i, j, x, y;
4072     emms_c();
4073     for(i=0; i<8; i++){
4074         for(j=0; j<8; j++){
4075             for(y=0; y<8; y++){
4076                 for(x=0; x<8; x++){
4077                     double s= 0.25*(1<<BASIS_SHIFT);
4078                     int index= 8*i + j;
4079                     int perm_index= perm[index];
4080                     if(i==0) s*= sqrt(0.5);
4081                     if(j==0) s*= sqrt(0.5);
4082                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4083                 }
4084             }
4085         }
4086     }
4087 }
4088
4089 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4090                         int16_t *block, int16_t *weight, int16_t *orig,
4091                         int n, int qscale){
4092     int16_t rem[64];
4093     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4094     const uint8_t *scantable= s->intra_scantable.scantable;
4095     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4096 //    unsigned int threshold1, threshold2;
4097 //    int bias=0;
4098     int run_tab[65];
4099     int prev_run=0;
4100     int prev_level=0;
4101     int qmul, qadd, start_i, last_non_zero, i, dc;
4102     uint8_t * length;
4103     uint8_t * last_length;
4104     int lambda;
4105     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4106 #ifdef REFINE_STATS
4107 static int count=0;
4108 static int after_last=0;
4109 static int to_zero=0;
4110 static int from_zero=0;
4111 static int raise=0;
4112 static int lower=0;
4113 static int messed_sign=0;
4114 #endif
4115
4116     if(basis[0][0] == 0)
4117         build_basis(s->idsp.idct_permutation);
4118
4119     qmul= qscale*2;
4120     qadd= (qscale-1)|1;
4121     if (s->mb_intra) {
4122         if (!s->h263_aic) {
4123             if (n < 4)
4124                 q = s->y_dc_scale;
4125             else
4126                 q = s->c_dc_scale;
4127         } else{
4128             /* For AIC we skip quant/dequant of INTRADC */
4129             q = 1;
4130             qadd=0;
4131         }
4132         q <<= RECON_SHIFT-3;
4133         /* note: block[0] is assumed to be positive */
4134         dc= block[0]*q;
4135 //        block[0] = (block[0] + (q >> 1)) / q;
4136         start_i = 1;
4137 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4138 //            bias= 1<<(QMAT_SHIFT-1);
4139         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4140             length     = s->intra_chroma_ac_vlc_length;
4141             last_length= s->intra_chroma_ac_vlc_last_length;
4142         } else {
4143             length     = s->intra_ac_vlc_length;
4144             last_length= s->intra_ac_vlc_last_length;
4145         }
4146     } else {
4147         dc= 0;
4148         start_i = 0;
4149         length     = s->inter_ac_vlc_length;
4150         last_length= s->inter_ac_vlc_last_length;
4151     }
4152     last_non_zero = s->block_last_index[n];
4153
4154 #ifdef REFINE_STATS
4155 {START_TIMER
4156 #endif
4157     dc += (1<<(RECON_SHIFT-1));
4158     for(i=0; i<64; i++){
4159         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4160     }
4161 #ifdef REFINE_STATS
4162 STOP_TIMER("memset rem[]")}
4163 #endif
4164     sum=0;
4165     for(i=0; i<64; i++){
4166         int one= 36;
4167         int qns=4;
4168         int w;
4169
4170         w= FFABS(weight[i]) + qns*one;
4171         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4172
4173         weight[i] = w;
4174 //        w=weight[i] = (63*qns + (w/2)) / w;
4175
4176         av_assert2(w>0);
4177         av_assert2(w<(1<<6));
4178         sum += w*w;
4179     }
4180     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4181 #ifdef REFINE_STATS
4182 {START_TIMER
4183 #endif
4184     run=0;
4185     rle_index=0;
4186     for(i=start_i; i<=last_non_zero; i++){
4187         int j= perm_scantable[i];
4188         const int level= block[j];
4189         int coeff;
4190
4191         if(level){
4192             if(level<0) coeff= qmul*level - qadd;
4193             else        coeff= qmul*level + qadd;
4194             run_tab[rle_index++]=run;
4195             run=0;
4196
4197             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4198         }else{
4199             run++;
4200         }
4201     }
4202 #ifdef REFINE_STATS
4203 if(last_non_zero>0){
4204 STOP_TIMER("init rem[]")
4205 }
4206 }
4207
4208 {START_TIMER
4209 #endif
4210     for(;;){
4211         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4212         int best_coeff=0;
4213         int best_change=0;
4214         int run2, best_unquant_change=0, analyze_gradient;
4215 #ifdef REFINE_STATS
4216 {START_TIMER
4217 #endif
4218         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4219
4220         if(analyze_gradient){
4221 #ifdef REFINE_STATS
4222 {START_TIMER
4223 #endif
4224             for(i=0; i<64; i++){
4225                 int w= weight[i];
4226
4227                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4228             }
4229 #ifdef REFINE_STATS
4230 STOP_TIMER("rem*w*w")}
4231 {START_TIMER
4232 #endif
4233             s->fdsp.fdct(d1);
4234 #ifdef REFINE_STATS
4235 STOP_TIMER("dct")}
4236 #endif
4237         }
4238
4239         if(start_i){
4240             const int level= block[0];
4241             int change, old_coeff;
4242
4243             av_assert2(s->mb_intra);
4244
4245             old_coeff= q*level;
4246
4247             for(change=-1; change<=1; change+=2){
4248                 int new_level= level + change;
4249                 int score, new_coeff;
4250
4251                 new_coeff= q*new_level;
4252                 if(new_coeff >= 2048 || new_coeff < 0)
4253                     continue;
4254
4255                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4256                                                   new_coeff - old_coeff);
4257                 if(score<best_score){
4258                     best_score= score;
4259                     best_coeff= 0;
4260                     best_change= change;
4261                     best_unquant_change= new_coeff - old_coeff;
4262                 }
4263             }
4264         }
4265
4266         run=0;
4267         rle_index=0;
4268         run2= run_tab[rle_index++];
4269         prev_level=0;
4270         prev_run=0;
4271
4272         for(i=start_i; i<64; i++){
4273             int j= perm_scantable[i];
4274             const int level= block[j];
4275             int change, old_coeff;
4276
4277             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4278                 break;
4279
4280             if(level){
4281                 if(level<0) old_coeff= qmul*level - qadd;
4282                 else        old_coeff= qmul*level + qadd;
4283                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4284             }else{
4285                 old_coeff=0;
4286                 run2--;
4287                 av_assert2(run2>=0 || i >= last_non_zero );
4288             }
4289
4290             for(change=-1; change<=1; change+=2){
4291                 int new_level= level + change;
4292                 int score, new_coeff, unquant_change;
4293
4294                 score=0;
4295                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4296                    continue;
4297
4298                 if(new_level){
4299                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4300                     else            new_coeff= qmul*new_level + qadd;
4301                     if(new_coeff >= 2048 || new_coeff <= -2048)
4302                         continue;
4303                     //FIXME check for overflow
4304
4305                     if(level){
4306                         if(level < 63 && level > -63){
4307                             if(i < last_non_zero)
4308                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4309                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4310                             else
4311                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4312                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4313                         }
4314                     }else{
4315                         av_assert2(FFABS(new_level)==1);
4316
4317                         if(analyze_gradient){
4318                             int g= d1[ scantable[i] ];
4319                             if(g && (g^new_level) >= 0)
4320                                 continue;
4321                         }
4322
4323                         if(i < last_non_zero){
4324                             int next_i= i + run2 + 1;
4325                             int next_level= block[ perm_scantable[next_i] ] + 64;
4326
4327                             if(next_level&(~127))
4328                                 next_level= 0;
4329
4330                             if(next_i < last_non_zero)
4331                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4332                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4333                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4334                             else
4335                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4336                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4337                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4338                         }else{
4339                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4340                             if(prev_level){
4341                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4342                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4343                             }
4344                         }
4345                     }
4346                 }else{
4347                     new_coeff=0;
4348                     av_assert2(FFABS(level)==1);
4349
4350                     if(i < last_non_zero){
4351                         int next_i= i + run2 + 1;
4352                         int next_level= block[ perm_scantable[next_i] ] + 64;
4353
4354                         if(next_level&(~127))
4355                             next_level= 0;
4356
4357                         if(next_i < last_non_zero)
4358                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4359                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4360                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4361                         else
4362                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4363                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4364                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4365                     }else{
4366                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4367                         if(prev_level){
4368                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4369                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4370                         }
4371                     }
4372                 }
4373
4374                 score *= lambda;
4375
4376                 unquant_change= new_coeff - old_coeff;
4377                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4378
4379                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4380                                                    unquant_change);
4381                 if(score<best_score){
4382                     best_score= score;
4383                     best_coeff= i;
4384                     best_change= change;
4385                     best_unquant_change= unquant_change;
4386                 }
4387             }
4388             if(level){
4389                 prev_level= level + 64;
4390                 if(prev_level&(~127))
4391                     prev_level= 0;
4392                 prev_run= run;
4393                 run=0;
4394             }else{
4395                 run++;
4396             }
4397         }
4398 #ifdef REFINE_STATS
4399 STOP_TIMER("iterative step")}
4400 #endif
4401
4402         if(best_change){
4403             int j= perm_scantable[ best_coeff ];
4404
4405             block[j] += best_change;
4406
4407             if(best_coeff > last_non_zero){
4408                 last_non_zero= best_coeff;
4409                 av_assert2(block[j]);
4410 #ifdef REFINE_STATS
4411 after_last++;
4412 #endif
4413             }else{
4414 #ifdef REFINE_STATS
4415 if(block[j]){
4416     if(block[j] - best_change){
4417         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4418             raise++;
4419         }else{
4420             lower++;
4421         }
4422     }else{
4423         from_zero++;
4424     }
4425 }else{
4426     to_zero++;
4427 }
4428 #endif
4429                 for(; last_non_zero>=start_i; last_non_zero--){
4430                     if(block[perm_scantable[last_non_zero]])
4431                         break;
4432                 }
4433             }
4434 #ifdef REFINE_STATS
4435 count++;
4436 if(256*256*256*64 % count == 0){
4437     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4438 }
4439 #endif
4440             run=0;
4441             rle_index=0;
4442             for(i=start_i; i<=last_non_zero; i++){
4443                 int j= perm_scantable[i];
4444                 const int level= block[j];
4445
4446                  if(level){
4447                      run_tab[rle_index++]=run;
4448                      run=0;
4449                  }else{
4450                      run++;
4451                  }
4452             }
4453
4454             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4455         }else{
4456             break;
4457         }
4458     }
4459 #ifdef REFINE_STATS
4460 if(last_non_zero>0){
4461 STOP_TIMER("iterative search")
4462 }
4463 }
4464 #endif
4465
4466     return last_non_zero;
4467 }
4468
4469 int ff_dct_quantize_c(MpegEncContext *s,
4470                         int16_t *block, int n,
4471                         int qscale, int *overflow)
4472 {
4473     int i, j, level, last_non_zero, q, start_i;
4474     const int *qmat;
4475     const uint8_t *scantable= s->intra_scantable.scantable;
4476     int bias;
4477     int max=0;
4478     unsigned int threshold1, threshold2;
4479
4480     s->fdsp.fdct(block);
4481
4482     if(s->dct_error_sum)
4483         s->denoise_dct(s, block);
4484
4485     if (s->mb_intra) {
4486         if (!s->h263_aic) {
4487             if (n < 4)
4488                 q = s->y_dc_scale;
4489             else
4490                 q = s->c_dc_scale;
4491             q = q << 3;
4492         } else
4493             /* For AIC we skip quant/dequant of INTRADC */
4494             q = 1 << 3;
4495
4496         /* note: block[0] is assumed to be positive */
4497         block[0] = (block[0] + (q >> 1)) / q;
4498         start_i = 1;
4499         last_non_zero = 0;
4500         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4501         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4502     } else {
4503         start_i = 0;
4504         last_non_zero = -1;
4505         qmat = s->q_inter_matrix[qscale];
4506         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4507     }
4508     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4509     threshold2= (threshold1<<1);
4510     for(i=63;i>=start_i;i--) {
4511         j = scantable[i];
4512         level = block[j] * qmat[j];
4513
4514         if(((unsigned)(level+threshold1))>threshold2){
4515             last_non_zero = i;
4516             break;
4517         }else{
4518             block[j]=0;
4519         }
4520     }
4521     for(i=start_i; i<=last_non_zero; i++) {
4522         j = scantable[i];
4523         level = block[j] * qmat[j];
4524
4525 //        if(   bias+level >= (1<<QMAT_SHIFT)
4526 //           || bias-level >= (1<<QMAT_SHIFT)){
4527         if(((unsigned)(level+threshold1))>threshold2){
4528             if(level>0){
4529                 level= (bias + level)>>QMAT_SHIFT;
4530                 block[j]= level;
4531             }else{
4532                 level= (bias - level)>>QMAT_SHIFT;
4533                 block[j]= -level;
4534             }
4535             max |=level;
4536         }else{
4537             block[j]=0;
4538         }
4539     }
4540     *overflow= s->max_qcoeff < max; //overflow might have happened
4541
4542     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4543     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4544         ff_block_permute(block, s->idsp.idct_permutation,
4545                          scantable, last_non_zero);
4546
4547     return last_non_zero;
4548 }
4549
4550 #define OFFSET(x) offsetof(MpegEncContext, x)
4551 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4552 static const AVOption h263_options[] = {
4553     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4554     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4555     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4556     FF_MPV_COMMON_OPTS
4557     { NULL },
4558 };
4559
4560 static const AVClass h263_class = {
4561     .class_name = "H.263 encoder",
4562     .item_name  = av_default_item_name,
4563     .option     = h263_options,
4564     .version    = LIBAVUTIL_VERSION_INT,
4565 };
4566
4567 AVCodec ff_h263_encoder = {
4568     .name           = "h263",
4569     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4570     .type           = AVMEDIA_TYPE_VIDEO,
4571     .id             = AV_CODEC_ID_H263,
4572     .priv_data_size = sizeof(MpegEncContext),
4573     .init           = ff_mpv_encode_init,
4574     .encode2        = ff_mpv_encode_picture,
4575     .close          = ff_mpv_encode_end,
4576     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4577     .priv_class     = &h263_class,
4578 };
4579
4580 static const AVOption h263p_options[] = {
4581     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4582     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4583     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4584     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4585     FF_MPV_COMMON_OPTS
4586     { NULL },
4587 };
4588 static const AVClass h263p_class = {
4589     .class_name = "H.263p encoder",
4590     .item_name  = av_default_item_name,
4591     .option     = h263p_options,
4592     .version    = LIBAVUTIL_VERSION_INT,
4593 };
4594
4595 AVCodec ff_h263p_encoder = {
4596     .name           = "h263p",
4597     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4598     .type           = AVMEDIA_TYPE_VIDEO,
4599     .id             = AV_CODEC_ID_H263P,
4600     .priv_data_size = sizeof(MpegEncContext),
4601     .init           = ff_mpv_encode_init,
4602     .encode2        = ff_mpv_encode_picture,
4603     .close          = ff_mpv_encode_end,
4604     .capabilities   = CODEC_CAP_SLICE_THREADS,
4605     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4606     .priv_class     = &h263p_class,
4607 };
4608
4609 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4610
4611 AVCodec ff_msmpeg4v2_encoder = {
4612     .name           = "msmpeg4v2",
4613     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4614     .type           = AVMEDIA_TYPE_VIDEO,
4615     .id             = AV_CODEC_ID_MSMPEG4V2,
4616     .priv_data_size = sizeof(MpegEncContext),
4617     .init           = ff_mpv_encode_init,
4618     .encode2        = ff_mpv_encode_picture,
4619     .close          = ff_mpv_encode_end,
4620     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4621     .priv_class     = &msmpeg4v2_class,
4622 };
4623
4624 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4625
4626 AVCodec ff_msmpeg4v3_encoder = {
4627     .name           = "msmpeg4",
4628     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4629     .type           = AVMEDIA_TYPE_VIDEO,
4630     .id             = AV_CODEC_ID_MSMPEG4V3,
4631     .priv_data_size = sizeof(MpegEncContext),
4632     .init           = ff_mpv_encode_init,
4633     .encode2        = ff_mpv_encode_picture,
4634     .close          = ff_mpv_encode_end,
4635     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4636     .priv_class     = &msmpeg4v3_class,
4637 };
4638
4639 FF_MPV_GENERIC_CLASS(wmv1)
4640
4641 AVCodec ff_wmv1_encoder = {
4642     .name           = "wmv1",
4643     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4644     .type           = AVMEDIA_TYPE_VIDEO,
4645     .id             = AV_CODEC_ID_WMV1,
4646     .priv_data_size = sizeof(MpegEncContext),
4647     .init           = ff_mpv_encode_init,
4648     .encode2        = ff_mpv_encode_picture,
4649     .close          = ff_mpv_encode_end,
4650     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4651     .priv_class     = &wmv1_class,
4652 };