git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "h261.h"
  44 #include "h263.h"
  45 #include "mjpegenc_common.h"
  46 #include "mathops.h"
  47 #include "mpegutils.h"
  48 #include "mjpegenc.h"
  49 #include "msmpeg4.h"
  50 #include "pixblockdsp.h"
  51 #include "qpeldsp.h"
  52 #include "faandct.h"
  53 #include "thread.h"
  54 #include "aandcttab.h"
  55 #include "flv.h"
  56 #include "mpeg4video.h"
  57 #include "internal.h"
  58 #include "bytestream.h"
  59 #include <limits.h>
  60 #include "sp5x.h"
  61
  62 #define QUANT_BIAS_SHIFT 8
  63
  64 #define QMAT_SHIFT_MMX 16
  65 #define QMAT_SHIFT 21
  66
  67 static int encode_picture(MpegEncContext *s, int picture_number);
  68 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  69 static int sse_mb(MpegEncContext *s);
  70 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  71 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  72
  73 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  74 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  75
  76 const AVOption ff_mpv_generic_options[] = {
  77     FF_MPV_COMMON_OPTS
  78     { NULL },
  79 };
  80
  81 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  82                        uint16_t (*qmat16)[2][64],
  83                        const uint16_t *quant_matrix,
  84                        int bias, int qmin, int qmax, int intra)
  85 {
  86     FDCTDSPContext *fdsp = &s->fdsp;
  87     int qscale;
  88     int shift = 0;
  89
  90     for (qscale = qmin; qscale <= qmax; qscale++) {
  91         int i;
  92         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  93 #if CONFIG_FAANDCT
  94             fdsp->fdct == ff_faandct            ||
  95 #endif /* CONFIG_FAANDCT */
  96             fdsp->fdct == ff_jpeg_fdct_islow_10) {
  97             for (i = 0; i < 64; i++) {
  98                 const int j = s->idsp.idct_permutation[i];
  99                 int64_t den = (int64_t) qscale * quant_matrix[j];
 100                 /* 16 <= qscale * quant_matrix[i] <= 7905
 101                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 102                  *             19952 <=              x  <= 249205026
 103                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 104                  *           3444240 >= (1 << 36) / (x) >= 275 */
 105
 106                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 107             }
 108         } else if (fdsp->fdct == ff_fdct_ifast) {
 109             for (i = 0; i < 64; i++) {
 110                 const int j = s->idsp.idct_permutation[i];
 111                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
 112                 /* 16 <= qscale * quant_matrix[i] <= 7905
 113                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 114                  *             19952 <=              x  <= 249205026
 115                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 116                  *           3444240 >= (1 << 36) / (x) >= 275 */
 117
 118                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
 119             }
 120         } else {
 121             for (i = 0; i < 64; i++) {
 122                 const int j = s->idsp.idct_permutation[i];
 123                 int64_t den = (int64_t) qscale * quant_matrix[j];
 124                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 125                  * Assume x = qscale * quant_matrix[i]
 126                  * So             16 <=              x  <= 7905
 127                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 128                  * so          32768 >= (1 << 19) / (x) >= 67 */
 129                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 130                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 131                 //                    (qscale * quant_matrix[i]);
 132                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
 133
 134                 if (qmat16[qscale][0][i] == 0 ||
 135                     qmat16[qscale][0][i] == 128 * 256)
 136                     qmat16[qscale][0][i] = 128 * 256 - 1;
 137                 qmat16[qscale][1][i] =
 138                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 139                                 qmat16[qscale][0][i]);
 140             }
 141         }
 142
 143         for (i = intra; i < 64; i++) {
 144             int64_t max = 8191;
 145             if (fdsp->fdct == ff_fdct_ifast) {
 146                 max = (8191LL * ff_aanscales[i]) >> 14;
 147             }
 148             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 149                 shift++;
 150             }
 151         }
 152     }
 153     if (shift) {
 154         av_log(NULL, AV_LOG_INFO,
 155                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 156                QMAT_SHIFT - shift);
 157     }
 158 }
 159
 160 static inline void update_qscale(MpegEncContext *s)
 161 {
 162     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 163                 (FF_LAMBDA_SHIFT + 7);
 164     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 165
 166     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 167                  FF_LAMBDA_SHIFT;
 168 }
 169
 170 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 171 {
 172     int i;
 173
 174     if (matrix) {
 175         put_bits(pb, 1, 1);
 176         for (i = 0; i < 64; i++) {
 177             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 178         }
 179     } else
 180         put_bits(pb, 1, 0);
 181 }
 182
 183 /**
 184  * init s->current_picture.qscale_table from s->lambda_table
 185  */
 186 void ff_init_qscale_tab(MpegEncContext *s)
 187 {
 188     int8_t * const qscale_table = s->current_picture.qscale_table;
 189     int i;
 190
 191     for (i = 0; i < s->mb_num; i++) {
 192         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 193         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 194         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 195                                                   s->avctx->qmax);
 196     }
 197 }
 198
 199 static void update_duplicate_context_after_me(MpegEncContext *dst,
 200                                               MpegEncContext *src)
 201 {
 202 #define COPY(a) dst->a= src->a
 203     COPY(pict_type);
 204     COPY(current_picture);
 205     COPY(f_code);
 206     COPY(b_code);
 207     COPY(qscale);
 208     COPY(lambda);
 209     COPY(lambda2);
 210     COPY(picture_in_gop_number);
 211     COPY(gop_picture_number);
 212     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 213     COPY(progressive_frame);    // FIXME don't set in encode_header
 214     COPY(partitioned_frame);    // FIXME don't set in encode_header
 215 #undef COPY
 216 }
 217
 218 /**
 219  * Set the given MpegEncContext to defaults for encoding.
 220  * the changed fields will not depend upon the prior state of the MpegEncContext.
 221  */
 222 static void mpv_encode_defaults(MpegEncContext *s)
 223 {
 224     int i;
 225     ff_mpv_common_defaults(s);
 226
 227     for (i = -16; i < 16; i++) {
 228         default_fcode_tab[i + MAX_MV] = 1;
 229     }
 230     s->me.mv_penalty = default_mv_penalty;
 231     s->fcode_tab     = default_fcode_tab;
 232
 233     s->input_picture_number  = 0;
 234     s->picture_in_gop_number = 0;
 235 }
 236
 237 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 238     if (ARCH_X86)
 239         ff_dct_encode_init_x86(s);
 240
 241     if (CONFIG_H263_ENCODER)
 242         ff_h263dsp_init(&s->h263dsp);
 243     if (!s->dct_quantize)
 244         s->dct_quantize = ff_dct_quantize_c;
 245     if (!s->denoise_dct)
 246         s->denoise_dct  = denoise_dct_c;
 247     s->fast_dct_quantize = s->dct_quantize;
 248     if (s->avctx->trellis)
 249         s->dct_quantize  = dct_quantize_trellis_c;
 250
 251     return 0;
 252 }
 253
 254 /* init video encoder */
 255 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 256 {
 257     MpegEncContext *s = avctx->priv_data;
 258     int i, ret, format_supported;
 259
 260     mpv_encode_defaults(s);
 261
 262     switch (avctx->codec_id) {
 263     case AV_CODEC_ID_MPEG2VIDEO:
 264         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 265             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 266             av_log(avctx, AV_LOG_ERROR,
 267                    "only YUV420 and YUV422 are supported\n");
 268             return -1;
 269         }
 270         break;
 271     case AV_CODEC_ID_MJPEG:
 272     case AV_CODEC_ID_AMV:
 273         format_supported = 0;
 274         /* JPEG color space */
 275         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 276             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 277             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
 278             (avctx->color_range == AVCOL_RANGE_JPEG &&
 279              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 280               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 281               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
 282             format_supported = 1;
 283         /* MPEG color space */
 284         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 285                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 286                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 287                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
 288             format_supported = 1;
 289
 290         if (!format_supported) {
 291             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 292             return -1;
 293         }
 294         break;
 295     default:
 296         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 297             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 298             return -1;
 299         }
 300     }
 301
 302     switch (avctx->pix_fmt) {
 303     case AV_PIX_FMT_YUVJ444P:
 304     case AV_PIX_FMT_YUV444P:
 305         s->chroma_format = CHROMA_444;
 306         break;
 307     case AV_PIX_FMT_YUVJ422P:
 308     case AV_PIX_FMT_YUV422P:
 309         s->chroma_format = CHROMA_422;
 310         break;
 311     case AV_PIX_FMT_YUVJ420P:
 312     case AV_PIX_FMT_YUV420P:
 313     default:
 314         s->chroma_format = CHROMA_420;
 315         break;
 316     }
 317
 318     s->bit_rate = avctx->bit_rate;
 319     s->width    = avctx->width;
 320     s->height   = avctx->height;
 321     if (avctx->gop_size > 600 &&
 322         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 323         av_log(avctx, AV_LOG_WARNING,
 324                "keyframe interval too large!, reducing it from %d to %d\n",
 325                avctx->gop_size, 600);
 326         avctx->gop_size = 600;
 327     }
 328     s->gop_size     = avctx->gop_size;
 329     s->avctx        = avctx;
 330     s->flags        = avctx->flags;
 331     s->flags2       = avctx->flags2;
 332     if (avctx->max_b_frames > MAX_B_FRAMES) {
 333         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 334                "is %d.\n", MAX_B_FRAMES);
 335         avctx->max_b_frames = MAX_B_FRAMES;
 336     }
 337     s->max_b_frames = avctx->max_b_frames;
 338     s->codec_id     = avctx->codec->id;
 339     s->strict_std_compliance = avctx->strict_std_compliance;
 340     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 341     s->mpeg_quant         = avctx->mpeg_quant;
 342     s->rtp_mode           = !!avctx->rtp_payload_size;
 343     s->intra_dc_precision = avctx->intra_dc_precision;
 344
 345     // workaround some differences between how applications specify dc precision
 346     if (s->intra_dc_precision < 0) {
 347         s->intra_dc_precision += 8;
 348     } else if (s->intra_dc_precision >= 8)
 349         s->intra_dc_precision -= 8;
 350
 351     if (s->intra_dc_precision < 0) {
 352         av_log(avctx, AV_LOG_ERROR,
 353                 "intra dc precision must be positive, note some applications use"
 354                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
 355         return AVERROR(EINVAL);
 356     }
 357
 358     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
 359         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
 360         return AVERROR(EINVAL);
 361     }
 362     s->user_specified_pts = AV_NOPTS_VALUE;
 363
 364     if (s->gop_size <= 1) {
 365         s->intra_only = 1;
 366         s->gop_size   = 12;
 367     } else {
 368         s->intra_only = 0;
 369     }
 370
 371     s->me_method = avctx->me_method;
 372
 373     /* Fixed QSCALE */
 374     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 375
 376 #if FF_API_MPV_OPT
 377     FF_DISABLE_DEPRECATION_WARNINGS
 378     if (avctx->border_masking != 0.0)
 379         s->border_masking = avctx->border_masking;
 380     FF_ENABLE_DEPRECATION_WARNINGS
 381 #endif
 382
 383     s->adaptive_quant = (s->avctx->lumi_masking ||
 384                          s->avctx->dark_masking ||
 385                          s->avctx->temporal_cplx_masking ||
 386                          s->avctx->spatial_cplx_masking  ||
 387                          s->avctx->p_masking      ||
 388                          s->border_masking ||
 389                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 390                         !s->fixed_qscale;
 391
 392     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 393
 394     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 395         switch(avctx->codec_id) {
 396         case AV_CODEC_ID_MPEG1VIDEO:
 397         case AV_CODEC_ID_MPEG2VIDEO:
 398             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
 399             break;
 400         case AV_CODEC_ID_MPEG4:
 401         case AV_CODEC_ID_MSMPEG4V1:
 402         case AV_CODEC_ID_MSMPEG4V2:
 403         case AV_CODEC_ID_MSMPEG4V3:
 404             if       (avctx->rc_max_rate >= 15000000) {
 405                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
 406             } else if(avctx->rc_max_rate >=  2000000) {
 407                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
 408             } else if(avctx->rc_max_rate >=   384000) {
 409                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
 410             } else
 411                 avctx->rc_buffer_size = 40;
 412             avctx->rc_buffer_size *= 16384;
 413             break;
 414         }
 415         if (avctx->rc_buffer_size) {
 416             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 417         }
 418     }
 419
 420     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 421         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 422         return -1;
 423     }
 424
 425     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 426         av_log(avctx, AV_LOG_INFO,
 427                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 428     }
 429
 430     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 431         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 432         return -1;
 433     }
 434
 435     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 436         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 437         return -1;
 438     }
 439
 440     if (avctx->rc_max_rate &&
 441         avctx->rc_max_rate == avctx->bit_rate &&
 442         avctx->rc_max_rate != avctx->rc_min_rate) {
 443         av_log(avctx, AV_LOG_INFO,
 444                "impossible bitrate constraints, this will fail\n");
 445     }
 446
 447     if (avctx->rc_buffer_size &&
 448         avctx->bit_rate * (int64_t)avctx->time_base.num >
 449             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 450         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 451         return -1;
 452     }
 453
 454     if (!s->fixed_qscale &&
 455         avctx->bit_rate * av_q2d(avctx->time_base) >
 456             avctx->bit_rate_tolerance) {
 457         av_log(avctx, AV_LOG_WARNING,
 458                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 459         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 460     }
 461
 462     if (s->avctx->rc_max_rate &&
 463         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 464         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 465          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 466         90000LL * (avctx->rc_buffer_size - 1) >
 467             s->avctx->rc_max_rate * 0xFFFFLL) {
 468         av_log(avctx, AV_LOG_INFO,
 469                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 470                "specified vbv buffer is too large for the given bitrate!\n");
 471     }
 472
 473     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 474         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 475         s->codec_id != AV_CODEC_ID_FLV1) {
 476         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 477         return -1;
 478     }
 479
 480     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 481         av_log(avctx, AV_LOG_ERROR,
 482                "OBMC is only supported with simple mb decision\n");
 483         return -1;
 484     }
 485
 486     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 487         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 488         return -1;
 489     }
 490
 491     if (s->max_b_frames                    &&
 492         s->codec_id != AV_CODEC_ID_MPEG4      &&
 493         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 494         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 495         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 496         return -1;
 497     }
 498     if (s->max_b_frames < 0) {
 499         av_log(avctx, AV_LOG_ERROR,
 500                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 501         return -1;
 502     }
 503
 504     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 505          s->codec_id == AV_CODEC_ID_H263  ||
 506          s->codec_id == AV_CODEC_ID_H263P) &&
 507         (avctx->sample_aspect_ratio.num > 255 ||
 508          avctx->sample_aspect_ratio.den > 255)) {
 509         av_log(avctx, AV_LOG_WARNING,
 510                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 511                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 512         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 513                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 514     }
 515
 516     if ((s->codec_id == AV_CODEC_ID_H263  ||
 517          s->codec_id == AV_CODEC_ID_H263P) &&
 518         (avctx->width  > 2048 ||
 519          avctx->height > 1152 )) {
 520         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 521         return -1;
 522     }
 523     if ((s->codec_id == AV_CODEC_ID_H263  ||
 524          s->codec_id == AV_CODEC_ID_H263P) &&
 525         ((avctx->width &3) ||
 526          (avctx->height&3) )) {
 527         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 528         return -1;
 529     }
 530
 531     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 532         (avctx->width  > 4095 ||
 533          avctx->height > 4095 )) {
 534         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 535         return -1;
 536     }
 537
 538     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 539         (avctx->width  > 16383 ||
 540          avctx->height > 16383 )) {
 541         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 542         return -1;
 543     }
 544
 545     if (s->codec_id == AV_CODEC_ID_RV10 &&
 546         (avctx->width &15 ||
 547          avctx->height&15 )) {
 548         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 549         return AVERROR(EINVAL);
 550     }
 551
 552     if (s->codec_id == AV_CODEC_ID_RV20 &&
 553         (avctx->width &3 ||
 554          avctx->height&3 )) {
 555         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 556         return AVERROR(EINVAL);
 557     }
 558
 559     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 560          s->codec_id == AV_CODEC_ID_WMV2) &&
 561          avctx->width & 1) {
 562          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 563          return -1;
 564     }
 565
 566     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 567         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 568         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 569         return -1;
 570     }
 571
 572     // FIXME mpeg2 uses that too
 573     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 574                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 575         av_log(avctx, AV_LOG_ERROR,
 576                "mpeg2 style quantization not supported by codec\n");
 577         return -1;
 578     }
 579
 580     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 581         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 582         return -1;
 583     }
 584
 585     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 586         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 587         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 588         return -1;
 589     }
 590
 591     if (s->avctx->scenechange_threshold < 1000000000 &&
 592         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 593         av_log(avctx, AV_LOG_ERROR,
 594                "closed gop with scene change detection are not supported yet, "
 595                "set threshold to 1000000000\n");
 596         return -1;
 597     }
 598
 599     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 600         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 601             av_log(avctx, AV_LOG_ERROR,
 602                   "low delay forcing is only available for mpeg2\n");
 603             return -1;
 604         }
 605         if (s->max_b_frames != 0) {
 606             av_log(avctx, AV_LOG_ERROR,
 607                    "b frames cannot be used with low delay\n");
 608             return -1;
 609         }
 610     }
 611
 612     if (s->q_scale_type == 1) {
 613         if (avctx->qmax > 12) {
 614             av_log(avctx, AV_LOG_ERROR,
 615                    "non linear quant only supports qmax <= 12 currently\n");
 616             return -1;
 617         }
 618     }
 619
 620     if (s->avctx->thread_count > 1         &&
 621         s->codec_id != AV_CODEC_ID_MPEG4      &&
 622         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 623         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 624         s->codec_id != AV_CODEC_ID_MJPEG      &&
 625         (s->codec_id != AV_CODEC_ID_H263P)) {
 626         av_log(avctx, AV_LOG_ERROR,
 627                "multi threaded encoding not supported by codec\n");
 628         return -1;
 629     }
 630
 631     if (s->avctx->thread_count < 1) {
 632         av_log(avctx, AV_LOG_ERROR,
 633                "automatic thread number detection not supported by codec, "
 634                "patch welcome\n");
 635         return -1;
 636     }
 637
 638     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 639         s->rtp_mode = 1;
 640
 641     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 642         s->h263_slice_structured = 1;
 643
 644     if (!avctx->time_base.den || !avctx->time_base.num) {
 645         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 646         return -1;
 647     }
 648
 649     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 650         av_log(avctx, AV_LOG_INFO,
 651                "notice: b_frame_strategy only affects the first pass\n");
 652         avctx->b_frame_strategy = 0;
 653     }
 654
 655     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 656     if (i > 1) {
 657         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 658         avctx->time_base.den /= i;
 659         avctx->time_base.num /= i;
 660         //return -1;
 661     }
 662
 663     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 664         // (a + x * 3 / 8) / x
 665         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 666         s->inter_quant_bias = 0;
 667     } else {
 668         s->intra_quant_bias = 0;
 669         // (a - x / 4) / x
 670         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 671     }
 672
 673     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 674         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 675         return AVERROR(EINVAL);
 676     }
 677
 678     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 679         s->intra_quant_bias = avctx->intra_quant_bias;
 680     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 681         s->inter_quant_bias = avctx->inter_quant_bias;
 682
 683     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 684
 685     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 686         s->avctx->time_base.den > (1 << 16) - 1) {
 687         av_log(avctx, AV_LOG_ERROR,
 688                "timebase %d/%d not supported by MPEG 4 standard, "
 689                "the maximum admitted value for the timebase denominator "
 690                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 691                (1 << 16) - 1);
 692         return -1;
 693     }
 694     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 695
 696     switch (avctx->codec->id) {
 697     case AV_CODEC_ID_MPEG1VIDEO:
 698         s->out_format = FMT_MPEG1;
 699         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 700         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 701         break;
 702     case AV_CODEC_ID_MPEG2VIDEO:
 703         s->out_format = FMT_MPEG1;
 704         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 705         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 706         s->rtp_mode   = 1;
 707         break;
 708     case AV_CODEC_ID_MJPEG:
 709     case AV_CODEC_ID_AMV:
 710         s->out_format = FMT_MJPEG;
 711         s->intra_only = 1; /* force intra only for jpeg */
 712         if (!CONFIG_MJPEG_ENCODER ||
 713             ff_mjpeg_encode_init(s) < 0)
 714             return -1;
 715         avctx->delay = 0;
 716         s->low_delay = 1;
 717         break;
 718     case AV_CODEC_ID_H261:
 719         if (!CONFIG_H261_ENCODER)
 720             return -1;
 721         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 722             av_log(avctx, AV_LOG_ERROR,
 723                    "The specified picture size of %dx%d is not valid for the "
 724                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 725                     s->width, s->height);
 726             return -1;
 727         }
 728         s->out_format = FMT_H261;
 729         avctx->delay  = 0;
 730         s->low_delay  = 1;
 731         s->rtp_mode   = 0; /* Sliced encoding not supported */
 732         break;
 733     case AV_CODEC_ID_H263:
 734         if (!CONFIG_H263_ENCODER)
 735             return -1;
 736         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 737                              s->width, s->height) == 8) {
 738             av_log(avctx, AV_LOG_ERROR,
 739                    "The specified picture size of %dx%d is not valid for "
 740                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 741                    "352x288, 704x576, and 1408x1152. "
 742                    "Try H.263+.\n", s->width, s->height);
 743             return -1;
 744         }
 745         s->out_format = FMT_H263;
 746         avctx->delay  = 0;
 747         s->low_delay  = 1;
 748         break;
 749     case AV_CODEC_ID_H263P:
 750         s->out_format = FMT_H263;
 751         s->h263_plus  = 1;
 752         /* Fx */
 753         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 754         s->modified_quant  = s->h263_aic;
 755         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 756         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 757
 758         /* /Fx */
 759         /* These are just to be sure */
 760         avctx->delay = 0;
 761         s->low_delay = 1;
 762         break;
 763     case AV_CODEC_ID_FLV1:
 764         s->out_format      = FMT_H263;
 765         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 766         s->unrestricted_mv = 1;
 767         s->rtp_mode  = 0; /* don't allow GOB */
 768         avctx->delay = 0;
 769         s->low_delay = 1;
 770         break;
 771     case AV_CODEC_ID_RV10:
 772         s->out_format = FMT_H263;
 773         avctx->delay  = 0;
 774         s->low_delay  = 1;
 775         break;
 776     case AV_CODEC_ID_RV20:
 777         s->out_format      = FMT_H263;
 778         avctx->delay       = 0;
 779         s->low_delay       = 1;
 780         s->modified_quant  = 1;
 781         s->h263_aic        = 1;
 782         s->h263_plus       = 1;
 783         s->loop_filter     = 1;
 784         s->unrestricted_mv = 0;
 785         break;
 786     case AV_CODEC_ID_MPEG4:
 787         s->out_format      = FMT_H263;
 788         s->h263_pred       = 1;
 789         s->unrestricted_mv = 1;
 790         s->low_delay       = s->max_b_frames ? 0 : 1;
 791         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 792         break;
 793     case AV_CODEC_ID_MSMPEG4V2:
 794         s->out_format      = FMT_H263;
 795         s->h263_pred       = 1;
 796         s->unrestricted_mv = 1;
 797         s->msmpeg4_version = 2;
 798         avctx->delay       = 0;
 799         s->low_delay       = 1;
 800         break;
 801     case AV_CODEC_ID_MSMPEG4V3:
 802         s->out_format        = FMT_H263;
 803         s->h263_pred         = 1;
 804         s->unrestricted_mv   = 1;
 805         s->msmpeg4_version   = 3;
 806         s->flipflop_rounding = 1;
 807         avctx->delay         = 0;
 808         s->low_delay         = 1;
 809         break;
 810     case AV_CODEC_ID_WMV1:
 811         s->out_format        = FMT_H263;
 812         s->h263_pred         = 1;
 813         s->unrestricted_mv   = 1;
 814         s->msmpeg4_version   = 4;
 815         s->flipflop_rounding = 1;
 816         avctx->delay         = 0;
 817         s->low_delay         = 1;
 818         break;
 819     case AV_CODEC_ID_WMV2:
 820         s->out_format        = FMT_H263;
 821         s->h263_pred         = 1;
 822         s->unrestricted_mv   = 1;
 823         s->msmpeg4_version   = 5;
 824         s->flipflop_rounding = 1;
 825         avctx->delay         = 0;
 826         s->low_delay         = 1;
 827         break;
 828     default:
 829         return -1;
 830     }
 831
 832     avctx->has_b_frames = !s->low_delay;
 833
 834     s->encoding = 1;
 835
 836     s->progressive_frame    =
 837     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 838                                                 CODEC_FLAG_INTERLACED_ME) ||
 839                                 s->alternate_scan);
 840
 841     /* init */
 842     ff_mpv_idct_init(s);
 843     if (ff_mpv_common_init(s) < 0)
 844         return -1;
 845
 846     ff_fdctdsp_init(&s->fdsp, avctx);
 847     ff_me_cmp_init(&s->mecc, avctx);
 848     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 849     ff_pixblockdsp_init(&s->pdsp, avctx);
 850     ff_qpeldsp_init(&s->qdsp);
 851
 852     s->avctx->coded_frame = s->current_picture.f;
 853
 854     if (s->msmpeg4_version) {
 855         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 856                           2 * 2 * (MAX_LEVEL + 1) *
 857                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 858     }
 859     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 860
 861     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 862     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 863     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 864     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 865     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 866     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 867     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 868                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 869     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 870                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 871
 872     if (s->avctx->noise_reduction) {
 873         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 874                           2 * 64 * sizeof(uint16_t), fail);
 875     }
 876
 877     ff_dct_encode_init(s);
 878
 879     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 880         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 881
 882     s->quant_precision = 5;
 883
 884     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 885     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
 886
 887     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 888         ff_h261_encode_init(s);
 889     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 890         ff_h263_encode_init(s);
 891     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 892         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
 893             return ret;
 894     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 895         && s->out_format == FMT_MPEG1)
 896         ff_mpeg1_encode_init(s);
 897
 898     /* init q matrix */
 899     for (i = 0; i < 64; i++) {
 900         int j = s->idsp.idct_permutation[i];
 901         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 902             s->mpeg_quant) {
 903             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 904             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 905         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 906             s->intra_matrix[j] =
 907             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 908         } else {
 909             /* mpeg1/2 */
 910             s->chroma_intra_matrix[j] =
 911             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 912             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 913         }
 914         if (s->avctx->intra_matrix)
 915             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 916         if (s->avctx->inter_matrix)
 917             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 918     }
 919
 920     /* precompute matrix */
 921     /* for mjpeg, we do include qscale in the matrix */
 922     if (s->out_format != FMT_MJPEG) {
 923         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 924                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 925                           31, 1);
 926         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 927                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 928                           31, 0);
 929     }
 930
 931     if (ff_rate_control_init(s) < 0)
 932         return -1;
 933
 934 #if FF_API_ERROR_RATE
 935     FF_DISABLE_DEPRECATION_WARNINGS
 936     if (avctx->error_rate)
 937         s->error_rate = avctx->error_rate;
 938     FF_ENABLE_DEPRECATION_WARNINGS;
 939 #endif
 940
 941 #if FF_API_NORMALIZE_AQP
 942     FF_DISABLE_DEPRECATION_WARNINGS
 943     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 944         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 945     FF_ENABLE_DEPRECATION_WARNINGS;
 946 #endif
 947
 948 #if FF_API_MV0
 949     FF_DISABLE_DEPRECATION_WARNINGS
 950     if (avctx->flags & CODEC_FLAG_MV0)
 951         s->mpv_flags |= FF_MPV_FLAG_MV0;
 952     FF_ENABLE_DEPRECATION_WARNINGS
 953 #endif
 954
 955 #if FF_API_MPV_OPT
 956     FF_DISABLE_DEPRECATION_WARNINGS
 957     if (avctx->rc_qsquish != 0.0)
 958         s->rc_qsquish = avctx->rc_qsquish;
 959     if (avctx->rc_qmod_amp != 0.0)
 960         s->rc_qmod_amp = avctx->rc_qmod_amp;
 961     if (avctx->rc_qmod_freq)
 962         s->rc_qmod_freq = avctx->rc_qmod_freq;
 963     if (avctx->rc_buffer_aggressivity != 1.0)
 964         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
 965     if (avctx->rc_initial_cplx != 0.0)
 966         s->rc_initial_cplx = avctx->rc_initial_cplx;
 967     if (avctx->lmin)
 968         s->lmin = avctx->lmin;
 969     if (avctx->lmax)
 970         s->lmax = avctx->lmax;
 971
 972     if (avctx->rc_eq) {
 973         av_freep(&s->rc_eq);
 974         s->rc_eq = av_strdup(avctx->rc_eq);
 975         if (!s->rc_eq)
 976             return AVERROR(ENOMEM);
 977     }
 978     FF_ENABLE_DEPRECATION_WARNINGS
 979 #endif
 980
 981     if (avctx->b_frame_strategy == 2) {
 982         for (i = 0; i < s->max_b_frames + 2; i++) {
 983             s->tmp_frames[i] = av_frame_alloc();
 984             if (!s->tmp_frames[i])
 985                 return AVERROR(ENOMEM);
 986
 987             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 988             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 989             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 990
 991             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 992             if (ret < 0)
 993                 return ret;
 994         }
 995     }
 996
 997     return 0;
 998 fail:
 999     ff_mpv_encode_end(avctx);
1000     return AVERROR_UNKNOWN;
1001 }
1002
1003 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1004 {
1005     MpegEncContext *s = avctx->priv_data;
1006     int i;
1007
1008     ff_rate_control_uninit(s);
1009
1010     ff_mpv_common_end(s);
1011     if (CONFIG_MJPEG_ENCODER &&
1012         s->out_format == FMT_MJPEG)
1013         ff_mjpeg_encode_close(s);
1014
1015     av_freep(&avctx->extradata);
1016
1017     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1018         av_frame_free(&s->tmp_frames[i]);
1019
1020     ff_free_picture_tables(&s->new_picture);
1021     ff_mpeg_unref_picture(s, &s->new_picture);
1022
1023     av_freep(&s->avctx->stats_out);
1024     av_freep(&s->ac_stats);
1025
1026     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1027     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1028     s->q_chroma_intra_matrix=   NULL;
1029     s->q_chroma_intra_matrix16= NULL;
1030     av_freep(&s->q_intra_matrix);
1031     av_freep(&s->q_inter_matrix);
1032     av_freep(&s->q_intra_matrix16);
1033     av_freep(&s->q_inter_matrix16);
1034     av_freep(&s->input_picture);
1035     av_freep(&s->reordered_input_picture);
1036     av_freep(&s->dct_offset);
1037
1038     return 0;
1039 }
1040
1041 static int get_sae(uint8_t *src, int ref, int stride)
1042 {
1043     int x,y;
1044     int acc = 0;
1045
1046     for (y = 0; y < 16; y++) {
1047         for (x = 0; x < 16; x++) {
1048             acc += FFABS(src[x + y * stride] - ref);
1049         }
1050     }
1051
1052     return acc;
1053 }
1054
1055 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1056                            uint8_t *ref, int stride)
1057 {
1058     int x, y, w, h;
1059     int acc = 0;
1060
1061     w = s->width  & ~15;
1062     h = s->height & ~15;
1063
1064     for (y = 0; y < h; y += 16) {
1065         for (x = 0; x < w; x += 16) {
1066             int offset = x + y * stride;
1067             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1068                                       stride, 16);
1069             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1070             int sae  = get_sae(src + offset, mean, stride);
1071
1072             acc += sae + 500 < sad;
1073         }
1074     }
1075     return acc;
1076 }
1077
1078
1079 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1080 {
1081     Picture *pic = NULL;
1082     int64_t pts;
1083     int i, display_picture_number = 0, ret;
1084     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1085                                                  (s->low_delay ? 0 : 1);
1086     int direct = 1;
1087
1088     if (pic_arg) {
1089         pts = pic_arg->pts;
1090         display_picture_number = s->input_picture_number++;
1091
1092         if (pts != AV_NOPTS_VALUE) {
1093             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1094                 int64_t last = s->user_specified_pts;
1095
1096                 if (pts <= last) {
1097                     av_log(s->avctx, AV_LOG_ERROR,
1098                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1099                            pts, last);
1100                     return AVERROR(EINVAL);
1101                 }
1102
1103                 if (!s->low_delay && display_picture_number == 1)
1104                     s->dts_delta = pts - last;
1105             }
1106             s->user_specified_pts = pts;
1107         } else {
1108             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1109                 s->user_specified_pts =
1110                 pts = s->user_specified_pts + 1;
1111                 av_log(s->avctx, AV_LOG_INFO,
1112                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1113                        pts);
1114             } else {
1115                 pts = display_picture_number;
1116             }
1117         }
1118     }
1119
1120     if (pic_arg) {
1121         if (!pic_arg->buf[0] ||
1122             pic_arg->linesize[0] != s->linesize ||
1123             pic_arg->linesize[1] != s->uvlinesize ||
1124             pic_arg->linesize[2] != s->uvlinesize)
1125             direct = 0;
1126         if ((s->width & 15) || (s->height & 15))
1127             direct = 0;
1128         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1129             direct = 0;
1130         if (s->linesize & (STRIDE_ALIGN-1))
1131             direct = 0;
1132
1133         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1134                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1135
1136         i = ff_find_unused_picture(s, direct);
1137         if (i < 0)
1138             return i;
1139
1140         pic = &s->picture[i];
1141         pic->reference = 3;
1142
1143         if (direct) {
1144             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1145                 return ret;
1146             if (ff_alloc_picture(s, pic, 1) < 0) {
1147                 return -1;
1148             }
1149         } else {
1150             if (ff_alloc_picture(s, pic, 0) < 0) {
1151                 return -1;
1152             }
1153
1154             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1155                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1156                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1157                 // empty
1158             } else {
1159                 int h_chroma_shift, v_chroma_shift;
1160                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1161                                                  &h_chroma_shift,
1162                                                  &v_chroma_shift);
1163
1164                 for (i = 0; i < 3; i++) {
1165                     int src_stride = pic_arg->linesize[i];
1166                     int dst_stride = i ? s->uvlinesize : s->linesize;
1167                     int h_shift = i ? h_chroma_shift : 0;
1168                     int v_shift = i ? v_chroma_shift : 0;
1169                     int w = s->width  >> h_shift;
1170                     int h = s->height >> v_shift;
1171                     uint8_t *src = pic_arg->data[i];
1172                     uint8_t *dst = pic->f->data[i];
1173                     int vpad = 16;
1174
1175                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1176                         && !s->progressive_sequence
1177                         && FFALIGN(s->height, 32) - s->height > 16)
1178                         vpad = 32;
1179
1180                     if (!s->avctx->rc_buffer_size)
1181                         dst += INPLACE_OFFSET;
1182
1183                     if (src_stride == dst_stride)
1184                         memcpy(dst, src, src_stride * h);
1185                     else {
1186                         int h2 = h;
1187                         uint8_t *dst2 = dst;
1188                         while (h2--) {
1189                             memcpy(dst2, src, w);
1190                             dst2 += dst_stride;
1191                             src += src_stride;
1192                         }
1193                     }
1194                     if ((s->width & 15) || (s->height & (vpad-1))) {
1195                         s->mpvencdsp.draw_edges(dst, dst_stride,
1196                                                 w, h,
1197                                                 16 >> h_shift,
1198                                                 vpad >> v_shift,
1199                                                 EDGE_BOTTOM);
1200                     }
1201                 }
1202             }
1203         }
1204         ret = av_frame_copy_props(pic->f, pic_arg);
1205         if (ret < 0)
1206             return ret;
1207
1208         pic->f->display_picture_number = display_picture_number;
1209         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1210     }
1211
1212     /* shift buffer entries */
1213     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1214         s->input_picture[i - 1] = s->input_picture[i];
1215
1216     s->input_picture[encoding_delay] = (Picture*) pic;
1217
1218     return 0;
1219 }
1220
1221 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1222 {
1223     int x, y, plane;
1224     int score = 0;
1225     int64_t score64 = 0;
1226
1227     for (plane = 0; plane < 3; plane++) {
1228         const int stride = p->f->linesize[plane];
1229         const int bw = plane ? 1 : 2;
1230         for (y = 0; y < s->mb_height * bw; y++) {
1231             for (x = 0; x < s->mb_width * bw; x++) {
1232                 int off = p->shared ? 0 : 16;
1233                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1234                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1235                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1236
1237                 switch (FFABS(s->avctx->frame_skip_exp)) {
1238                 case 0: score    =  FFMAX(score, v);          break;
1239                 case 1: score   += FFABS(v);                  break;
1240                 case 2: score64 += v * (int64_t)v;                       break;
1241                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1242                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1243                 }
1244             }
1245         }
1246     }
1247     emms_c();
1248
1249     if (score)
1250         score64 = score;
1251     if (s->avctx->frame_skip_exp < 0)
1252         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1253                       -1.0/s->avctx->frame_skip_exp);
1254
1255     if (score64 < s->avctx->frame_skip_threshold)
1256         return 1;
1257     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1258         return 1;
1259     return 0;
1260 }
1261
1262 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1263 {
1264     AVPacket pkt = { 0 };
1265     int ret, got_output;
1266
1267     av_init_packet(&pkt);
1268     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1269     if (ret < 0)
1270         return ret;
1271
1272     ret = pkt.size;
1273     av_free_packet(&pkt);
1274     return ret;
1275 }
1276
1277 static int estimate_best_b_count(MpegEncContext *s)
1278 {
1279     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1280     AVCodecContext *c = avcodec_alloc_context3(NULL);
1281     const int scale = s->avctx->brd_scale;
1282     int i, j, out_size, p_lambda, b_lambda, lambda2;
1283     int64_t best_rd  = INT64_MAX;
1284     int best_b_count = -1;
1285
1286     if (!c)
1287         return AVERROR(ENOMEM);
1288     av_assert0(scale >= 0 && scale <= 3);
1289
1290     //emms_c();
1291     //s->next_picture_ptr->quality;
1292     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1293     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1294     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1295     if (!b_lambda) // FIXME we should do this somewhere else
1296         b_lambda = p_lambda;
1297     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1298                FF_LAMBDA_SHIFT;
1299
1300     c->width        = s->width  >> scale;
1301     c->height       = s->height >> scale;
1302     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1303     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1304     c->mb_decision  = s->avctx->mb_decision;
1305     c->me_cmp       = s->avctx->me_cmp;
1306     c->mb_cmp       = s->avctx->mb_cmp;
1307     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1308     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1309     c->time_base    = s->avctx->time_base;
1310     c->max_b_frames = s->max_b_frames;
1311
1312     if (avcodec_open2(c, codec, NULL) < 0)
1313         return -1;
1314
1315     for (i = 0; i < s->max_b_frames + 2; i++) {
1316         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1317                                                 s->next_picture_ptr;
1318         uint8_t *data[4];
1319
1320         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1321             pre_input = *pre_input_ptr;
1322             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1323
1324             if (!pre_input.shared && i) {
1325                 data[0] += INPLACE_OFFSET;
1326                 data[1] += INPLACE_OFFSET;
1327                 data[2] += INPLACE_OFFSET;
1328             }
1329
1330             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1331                                        s->tmp_frames[i]->linesize[0],
1332                                        data[0],
1333                                        pre_input.f->linesize[0],
1334                                        c->width, c->height);
1335             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1336                                        s->tmp_frames[i]->linesize[1],
1337                                        data[1],
1338                                        pre_input.f->linesize[1],
1339                                        c->width >> 1, c->height >> 1);
1340             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1341                                        s->tmp_frames[i]->linesize[2],
1342                                        data[2],
1343                                        pre_input.f->linesize[2],
1344                                        c->width >> 1, c->height >> 1);
1345         }
1346     }
1347
1348     for (j = 0; j < s->max_b_frames + 1; j++) {
1349         int64_t rd = 0;
1350
1351         if (!s->input_picture[j])
1352             break;
1353
1354         c->error[0] = c->error[1] = c->error[2] = 0;
1355
1356         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1357         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1358
1359         out_size = encode_frame(c, s->tmp_frames[0]);
1360
1361         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1362
1363         for (i = 0; i < s->max_b_frames + 1; i++) {
1364             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1365
1366             s->tmp_frames[i + 1]->pict_type = is_p ?
1367                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1368             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1369
1370             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1371
1372             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1373         }
1374
1375         /* get the delayed frames */
1376         while (out_size) {
1377             out_size = encode_frame(c, NULL);
1378             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1379         }
1380
1381         rd += c->error[0] + c->error[1] + c->error[2];
1382
1383         if (rd < best_rd) {
1384             best_rd = rd;
1385             best_b_count = j;
1386         }
1387     }
1388
1389     avcodec_close(c);
1390     av_freep(&c);
1391
1392     return best_b_count;
1393 }
1394
1395 static int select_input_picture(MpegEncContext *s)
1396 {
1397     int i, ret;
1398
1399     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1400         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1401     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1402
1403     /* set next picture type & ordering */
1404     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1405         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1406             if (s->picture_in_gop_number < s->gop_size &&
1407                 s->next_picture_ptr &&
1408                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1409                 // FIXME check that te gop check above is +-1 correct
1410                 av_frame_unref(s->input_picture[0]->f);
1411
1412                 ff_vbv_update(s, 0);
1413
1414                 goto no_output_pic;
1415             }
1416         }
1417
1418         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1419             !s->next_picture_ptr || s->intra_only) {
1420             s->reordered_input_picture[0] = s->input_picture[0];
1421             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1422             s->reordered_input_picture[0]->f->coded_picture_number =
1423                 s->coded_picture_number++;
1424         } else {
1425             int b_frames;
1426
1427             if (s->flags & CODEC_FLAG_PASS2) {
1428                 for (i = 0; i < s->max_b_frames + 1; i++) {
1429                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1430
1431                     if (pict_num >= s->rc_context.num_entries)
1432                         break;
1433                     if (!s->input_picture[i]) {
1434                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1435                         break;
1436                     }
1437
1438                     s->input_picture[i]->f->pict_type =
1439                         s->rc_context.entry[pict_num].new_pict_type;
1440                 }
1441             }
1442
1443             if (s->avctx->b_frame_strategy == 0) {
1444                 b_frames = s->max_b_frames;
1445                 while (b_frames && !s->input_picture[b_frames])
1446                     b_frames--;
1447             } else if (s->avctx->b_frame_strategy == 1) {
1448                 for (i = 1; i < s->max_b_frames + 1; i++) {
1449                     if (s->input_picture[i] &&
1450                         s->input_picture[i]->b_frame_score == 0) {
1451                         s->input_picture[i]->b_frame_score =
1452                             get_intra_count(s,
1453                                             s->input_picture[i    ]->f->data[0],
1454                                             s->input_picture[i - 1]->f->data[0],
1455                                             s->linesize) + 1;
1456                     }
1457                 }
1458                 for (i = 0; i < s->max_b_frames + 1; i++) {
1459                     if (!s->input_picture[i] ||
1460                         s->input_picture[i]->b_frame_score - 1 >
1461                             s->mb_num / s->avctx->b_sensitivity)
1462                         break;
1463                 }
1464
1465                 b_frames = FFMAX(0, i - 1);
1466
1467                 /* reset scores */
1468                 for (i = 0; i < b_frames + 1; i++) {
1469                     s->input_picture[i]->b_frame_score = 0;
1470                 }
1471             } else if (s->avctx->b_frame_strategy == 2) {
1472                 b_frames = estimate_best_b_count(s);
1473             } else {
1474                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1475                 b_frames = 0;
1476             }
1477
1478             emms_c();
1479
1480             for (i = b_frames - 1; i >= 0; i--) {
1481                 int type = s->input_picture[i]->f->pict_type;
1482                 if (type && type != AV_PICTURE_TYPE_B)
1483                     b_frames = i;
1484             }
1485             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1486                 b_frames == s->max_b_frames) {
1487                 av_log(s->avctx, AV_LOG_ERROR,
1488                        "warning, too many b frames in a row\n");
1489             }
1490
1491             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1492                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1493                     s->gop_size > s->picture_in_gop_number) {
1494                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1495                 } else {
1496                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1497                         b_frames = 0;
1498                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1499                 }
1500             }
1501
1502             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1503                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1504                 b_frames--;
1505
1506             s->reordered_input_picture[0] = s->input_picture[b_frames];
1507             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1508                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1509             s->reordered_input_picture[0]->f->coded_picture_number =
1510                 s->coded_picture_number++;
1511             for (i = 0; i < b_frames; i++) {
1512                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1513                 s->reordered_input_picture[i + 1]->f->pict_type =
1514                     AV_PICTURE_TYPE_B;
1515                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1516                     s->coded_picture_number++;
1517             }
1518         }
1519     }
1520 no_output_pic:
1521     if (s->reordered_input_picture[0]) {
1522         s->reordered_input_picture[0]->reference =
1523            s->reordered_input_picture[0]->f->pict_type !=
1524                AV_PICTURE_TYPE_B ? 3 : 0;
1525
1526         ff_mpeg_unref_picture(s, &s->new_picture);
1527         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1528             return ret;
1529
1530         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1531             // input is a shared pix, so we can't modifiy it -> alloc a new
1532             // one & ensure that the shared one is reuseable
1533
1534             Picture *pic;
1535             int i = ff_find_unused_picture(s, 0);
1536             if (i < 0)
1537                 return i;
1538             pic = &s->picture[i];
1539
1540             pic->reference = s->reordered_input_picture[0]->reference;
1541             if (ff_alloc_picture(s, pic, 0) < 0) {
1542                 return -1;
1543             }
1544
1545             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1546             if (ret < 0)
1547                 return ret;
1548
1549             /* mark us unused / free shared pic */
1550             av_frame_unref(s->reordered_input_picture[0]->f);
1551             s->reordered_input_picture[0]->shared = 0;
1552
1553             s->current_picture_ptr = pic;
1554         } else {
1555             // input is not a shared pix -> reuse buffer for current_pix
1556             s->current_picture_ptr = s->reordered_input_picture[0];
1557             for (i = 0; i < 4; i++) {
1558                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1559             }
1560         }
1561         ff_mpeg_unref_picture(s, &s->current_picture);
1562         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1563                                        s->current_picture_ptr)) < 0)
1564             return ret;
1565
1566         s->picture_number = s->new_picture.f->display_picture_number;
1567     } else {
1568         ff_mpeg_unref_picture(s, &s->new_picture);
1569     }
1570     return 0;
1571 }
1572
1573 static void frame_end(MpegEncContext *s)
1574 {
1575     if (s->unrestricted_mv &&
1576         s->current_picture.reference &&
1577         !s->intra_only) {
1578         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1579         int hshift = desc->log2_chroma_w;
1580         int vshift = desc->log2_chroma_h;
1581         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1582                                 s->current_picture.f->linesize[0],
1583                                 s->h_edge_pos, s->v_edge_pos,
1584                                 EDGE_WIDTH, EDGE_WIDTH,
1585                                 EDGE_TOP | EDGE_BOTTOM);
1586         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1587                                 s->current_picture.f->linesize[1],
1588                                 s->h_edge_pos >> hshift,
1589                                 s->v_edge_pos >> vshift,
1590                                 EDGE_WIDTH >> hshift,
1591                                 EDGE_WIDTH >> vshift,
1592                                 EDGE_TOP | EDGE_BOTTOM);
1593         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1594                                 s->current_picture.f->linesize[2],
1595                                 s->h_edge_pos >> hshift,
1596                                 s->v_edge_pos >> vshift,
1597                                 EDGE_WIDTH >> hshift,
1598                                 EDGE_WIDTH >> vshift,
1599                                 EDGE_TOP | EDGE_BOTTOM);
1600     }
1601
1602     emms_c();
1603
1604     s->last_pict_type                 = s->pict_type;
1605     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1606     if (s->pict_type!= AV_PICTURE_TYPE_B)
1607         s->last_non_b_pict_type = s->pict_type;
1608
1609     s->avctx->coded_frame = s->current_picture_ptr->f;
1610
1611 }
1612
1613 static void update_noise_reduction(MpegEncContext *s)
1614 {
1615     int intra, i;
1616
1617     for (intra = 0; intra < 2; intra++) {
1618         if (s->dct_count[intra] > (1 << 16)) {
1619             for (i = 0; i < 64; i++) {
1620                 s->dct_error_sum[intra][i] >>= 1;
1621             }
1622             s->dct_count[intra] >>= 1;
1623         }
1624
1625         for (i = 0; i < 64; i++) {
1626             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1627                                        s->dct_count[intra] +
1628                                        s->dct_error_sum[intra][i] / 2) /
1629                                       (s->dct_error_sum[intra][i] + 1);
1630         }
1631     }
1632 }
1633
1634 static int frame_start(MpegEncContext *s)
1635 {
1636     int ret;
1637
1638     /* mark & release old frames */
1639     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1640         s->last_picture_ptr != s->next_picture_ptr &&
1641         s->last_picture_ptr->f->buf[0]) {
1642         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1643     }
1644
1645     s->current_picture_ptr->f->pict_type = s->pict_type;
1646     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1647
1648     ff_mpeg_unref_picture(s, &s->current_picture);
1649     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1650                                    s->current_picture_ptr)) < 0)
1651         return ret;
1652
1653     if (s->pict_type != AV_PICTURE_TYPE_B) {
1654         s->last_picture_ptr = s->next_picture_ptr;
1655         if (!s->droppable)
1656             s->next_picture_ptr = s->current_picture_ptr;
1657     }
1658
1659     if (s->last_picture_ptr) {
1660         ff_mpeg_unref_picture(s, &s->last_picture);
1661         if (s->last_picture_ptr->f->buf[0] &&
1662             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1663                                        s->last_picture_ptr)) < 0)
1664             return ret;
1665     }
1666     if (s->next_picture_ptr) {
1667         ff_mpeg_unref_picture(s, &s->next_picture);
1668         if (s->next_picture_ptr->f->buf[0] &&
1669             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1670                                        s->next_picture_ptr)) < 0)
1671             return ret;
1672     }
1673
1674     if (s->picture_structure!= PICT_FRAME) {
1675         int i;
1676         for (i = 0; i < 4; i++) {
1677             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1678                 s->current_picture.f->data[i] +=
1679                     s->current_picture.f->linesize[i];
1680             }
1681             s->current_picture.f->linesize[i] *= 2;
1682             s->last_picture.f->linesize[i]    *= 2;
1683             s->next_picture.f->linesize[i]    *= 2;
1684         }
1685     }
1686
1687     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1688         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1689         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1690     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1691         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1692         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1693     } else {
1694         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1695         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1696     }
1697
1698     if (s->dct_error_sum) {
1699         av_assert2(s->avctx->noise_reduction && s->encoding);
1700         update_noise_reduction(s);
1701     }
1702
1703     return 0;
1704 }
1705
1706 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1707                           const AVFrame *pic_arg, int *got_packet)
1708 {
1709     MpegEncContext *s = avctx->priv_data;
1710     int i, stuffing_count, ret;
1711     int context_count = s->slice_context_count;
1712
1713     s->picture_in_gop_number++;
1714
1715     if (load_input_picture(s, pic_arg) < 0)
1716         return -1;
1717
1718     if (select_input_picture(s) < 0) {
1719         return -1;
1720     }
1721
1722     /* output? */
1723     if (s->new_picture.f->data[0]) {
1724         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1725         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - FF_INPUT_BUFFER_PADDING_SIZE
1726                                               :
1727                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1728         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
1729             return ret;
1730         if (s->mb_info) {
1731             s->mb_info_ptr = av_packet_new_side_data(pkt,
1732                                  AV_PKT_DATA_H263_MB_INFO,
1733                                  s->mb_width*s->mb_height*12);
1734             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1735         }
1736
1737         for (i = 0; i < context_count; i++) {
1738             int start_y = s->thread_context[i]->start_mb_y;
1739             int   end_y = s->thread_context[i]->  end_mb_y;
1740             int h       = s->mb_height;
1741             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1742             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1743
1744             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1745         }
1746
1747         s->pict_type = s->new_picture.f->pict_type;
1748         //emms_c();
1749         ret = frame_start(s);
1750         if (ret < 0)
1751             return ret;
1752 vbv_retry:
1753         ret = encode_picture(s, s->picture_number);
1754         if (growing_buffer) {
1755             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1756             pkt->data = s->pb.buf;
1757             pkt->size = avctx->internal->byte_buffer_size;
1758         }
1759         if (ret < 0)
1760             return -1;
1761
1762         avctx->header_bits = s->header_bits;
1763         avctx->mv_bits     = s->mv_bits;
1764         avctx->misc_bits   = s->misc_bits;
1765         avctx->i_tex_bits  = s->i_tex_bits;
1766         avctx->p_tex_bits  = s->p_tex_bits;
1767         avctx->i_count     = s->i_count;
1768         // FIXME f/b_count in avctx
1769         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1770         avctx->skip_count  = s->skip_count;
1771
1772         frame_end(s);
1773
1774         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1775             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1776
1777         if (avctx->rc_buffer_size) {
1778             RateControlContext *rcc = &s->rc_context;
1779             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1780
1781             if (put_bits_count(&s->pb) > max_size &&
1782                 s->lambda < s->lmax) {
1783                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1784                                        (s->qscale + 1) / s->qscale);
1785                 if (s->adaptive_quant) {
1786                     int i;
1787                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1788                         s->lambda_table[i] =
1789                             FFMAX(s->lambda_table[i] + 1,
1790                                   s->lambda_table[i] * (s->qscale + 1) /
1791                                   s->qscale);
1792                 }
1793                 s->mb_skipped = 0;        // done in frame_start()
1794                 // done in encode_picture() so we must undo it
1795                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1796                     if (s->flipflop_rounding          ||
1797                         s->codec_id == AV_CODEC_ID_H263P ||
1798                         s->codec_id == AV_CODEC_ID_MPEG4)
1799                         s->no_rounding ^= 1;
1800                 }
1801                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1802                     s->time_base       = s->last_time_base;
1803                     s->last_non_b_time = s->time - s->pp_time;
1804                 }
1805                 for (i = 0; i < context_count; i++) {
1806                     PutBitContext *pb = &s->thread_context[i]->pb;
1807                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1808                 }
1809                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1810                 goto vbv_retry;
1811             }
1812
1813             av_assert0(s->avctx->rc_max_rate);
1814         }
1815
1816         if (s->flags & CODEC_FLAG_PASS1)
1817             ff_write_pass1_stats(s);
1818
1819         for (i = 0; i < 4; i++) {
1820             s->current_picture_ptr->f->error[i] =
1821             s->current_picture.f->error[i] =
1822                 s->current_picture.error[i];
1823             avctx->error[i] += s->current_picture_ptr->f->error[i];
1824         }
1825
1826         if (s->flags & CODEC_FLAG_PASS1)
1827             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1828                    avctx->i_tex_bits + avctx->p_tex_bits ==
1829                        put_bits_count(&s->pb));
1830         flush_put_bits(&s->pb);
1831         s->frame_bits  = put_bits_count(&s->pb);
1832
1833         stuffing_count = ff_vbv_update(s, s->frame_bits);
1834         s->stuffing_bits = 8*stuffing_count;
1835         if (stuffing_count) {
1836             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1837                     stuffing_count + 50) {
1838                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1839                 return -1;
1840             }
1841
1842             switch (s->codec_id) {
1843             case AV_CODEC_ID_MPEG1VIDEO:
1844             case AV_CODEC_ID_MPEG2VIDEO:
1845                 while (stuffing_count--) {
1846                     put_bits(&s->pb, 8, 0);
1847                 }
1848             break;
1849             case AV_CODEC_ID_MPEG4:
1850                 put_bits(&s->pb, 16, 0);
1851                 put_bits(&s->pb, 16, 0x1C3);
1852                 stuffing_count -= 4;
1853                 while (stuffing_count--) {
1854                     put_bits(&s->pb, 8, 0xFF);
1855                 }
1856             break;
1857             default:
1858                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1859             }
1860             flush_put_bits(&s->pb);
1861             s->frame_bits  = put_bits_count(&s->pb);
1862         }
1863
1864         /* update mpeg1/2 vbv_delay for CBR */
1865         if (s->avctx->rc_max_rate                          &&
1866             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1867             s->out_format == FMT_MPEG1                     &&
1868             90000LL * (avctx->rc_buffer_size - 1) <=
1869                 s->avctx->rc_max_rate * 0xFFFFLL) {
1870             int vbv_delay, min_delay;
1871             double inbits  = s->avctx->rc_max_rate *
1872                              av_q2d(s->avctx->time_base);
1873             int    minbits = s->frame_bits - 8 *
1874                              (s->vbv_delay_ptr - s->pb.buf - 1);
1875             double bits    = s->rc_context.buffer_index + minbits - inbits;
1876
1877             if (bits < 0)
1878                 av_log(s->avctx, AV_LOG_ERROR,
1879                        "Internal error, negative bits\n");
1880
1881             assert(s->repeat_first_field == 0);
1882
1883             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1884             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1885                         s->avctx->rc_max_rate;
1886
1887             vbv_delay = FFMAX(vbv_delay, min_delay);
1888
1889             av_assert0(vbv_delay < 0xFFFF);
1890
1891             s->vbv_delay_ptr[0] &= 0xF8;
1892             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1893             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1894             s->vbv_delay_ptr[2] &= 0x07;
1895             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1896             avctx->vbv_delay     = vbv_delay * 300;
1897         }
1898         s->total_bits     += s->frame_bits;
1899         avctx->frame_bits  = s->frame_bits;
1900
1901         pkt->pts = s->current_picture.f->pts;
1902         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1903             if (!s->current_picture.f->coded_picture_number)
1904                 pkt->dts = pkt->pts - s->dts_delta;
1905             else
1906                 pkt->dts = s->reordered_pts;
1907             s->reordered_pts = pkt->pts;
1908         } else
1909             pkt->dts = pkt->pts;
1910         if (s->current_picture.f->key_frame)
1911             pkt->flags |= AV_PKT_FLAG_KEY;
1912         if (s->mb_info)
1913             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1914     } else {
1915         s->frame_bits = 0;
1916     }
1917
1918     /* release non-reference frames */
1919     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1920         if (!s->picture[i].reference)
1921             ff_mpeg_unref_picture(s, &s->picture[i]);
1922     }
1923
1924     av_assert1((s->frame_bits & 7) == 0);
1925
1926     pkt->size = s->frame_bits / 8;
1927     *got_packet = !!pkt->size;
1928     return 0;
1929 }
1930
1931 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1932                                                 int n, int threshold)
1933 {
1934     static const char tab[64] = {
1935         3, 2, 2, 1, 1, 1, 1, 1,
1936         1, 1, 1, 1, 1, 1, 1, 1,
1937         1, 1, 1, 1, 1, 1, 1, 1,
1938         0, 0, 0, 0, 0, 0, 0, 0,
1939         0, 0, 0, 0, 0, 0, 0, 0,
1940         0, 0, 0, 0, 0, 0, 0, 0,
1941         0, 0, 0, 0, 0, 0, 0, 0,
1942         0, 0, 0, 0, 0, 0, 0, 0
1943     };
1944     int score = 0;
1945     int run = 0;
1946     int i;
1947     int16_t *block = s->block[n];
1948     const int last_index = s->block_last_index[n];
1949     int skip_dc;
1950
1951     if (threshold < 0) {
1952         skip_dc = 0;
1953         threshold = -threshold;
1954     } else
1955         skip_dc = 1;
1956
1957     /* Are all we could set to zero already zero? */
1958     if (last_index <= skip_dc - 1)
1959         return;
1960
1961     for (i = 0; i <= last_index; i++) {
1962         const int j = s->intra_scantable.permutated[i];
1963         const int level = FFABS(block[j]);
1964         if (level == 1) {
1965             if (skip_dc && i == 0)
1966                 continue;
1967             score += tab[run];
1968             run = 0;
1969         } else if (level > 1) {
1970             return;
1971         } else {
1972             run++;
1973         }
1974     }
1975     if (score >= threshold)
1976         return;
1977     for (i = skip_dc; i <= last_index; i++) {
1978         const int j = s->intra_scantable.permutated[i];
1979         block[j] = 0;
1980     }
1981     if (block[0])
1982         s->block_last_index[n] = 0;
1983     else
1984         s->block_last_index[n] = -1;
1985 }
1986
1987 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1988                                int last_index)
1989 {
1990     int i;
1991     const int maxlevel = s->max_qcoeff;
1992     const int minlevel = s->min_qcoeff;
1993     int overflow = 0;
1994
1995     if (s->mb_intra) {
1996         i = 1; // skip clipping of intra dc
1997     } else
1998         i = 0;
1999
2000     for (; i <= last_index; i++) {
2001         const int j = s->intra_scantable.permutated[i];
2002         int level = block[j];
2003
2004         if (level > maxlevel) {
2005             level = maxlevel;
2006             overflow++;
2007         } else if (level < minlevel) {
2008             level = minlevel;
2009             overflow++;
2010         }
2011
2012         block[j] = level;
2013     }
2014
2015     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2016         av_log(s->avctx, AV_LOG_INFO,
2017                "warning, clipping %d dct coefficients to %d..%d\n",
2018                overflow, minlevel, maxlevel);
2019 }
2020
2021 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2022 {
2023     int x, y;
2024     // FIXME optimize
2025     for (y = 0; y < 8; y++) {
2026         for (x = 0; x < 8; x++) {
2027             int x2, y2;
2028             int sum = 0;
2029             int sqr = 0;
2030             int count = 0;
2031
2032             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2033                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2034                     int v = ptr[x2 + y2 * stride];
2035                     sum += v;
2036                     sqr += v * v;
2037                     count++;
2038                 }
2039             }
2040             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2041         }
2042     }
2043 }
2044
2045 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2046                                                 int motion_x, int motion_y,
2047                                                 int mb_block_height,
2048                                                 int mb_block_width,
2049                                                 int mb_block_count)
2050 {
2051     int16_t weight[12][64];
2052     int16_t orig[12][64];
2053     const int mb_x = s->mb_x;
2054     const int mb_y = s->mb_y;
2055     int i;
2056     int skip_dct[12];
2057     int dct_offset = s->linesize * 8; // default for progressive frames
2058     int uv_dct_offset = s->uvlinesize * 8;
2059     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2060     ptrdiff_t wrap_y, wrap_c;
2061
2062     for (i = 0; i < mb_block_count; i++)
2063         skip_dct[i] = s->skipdct;
2064
2065     if (s->adaptive_quant) {
2066         const int last_qp = s->qscale;
2067         const int mb_xy = mb_x + mb_y * s->mb_stride;
2068
2069         s->lambda = s->lambda_table[mb_xy];
2070         update_qscale(s);
2071
2072         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2073             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2074             s->dquant = s->qscale - last_qp;
2075
2076             if (s->out_format == FMT_H263) {
2077                 s->dquant = av_clip(s->dquant, -2, 2);
2078
2079                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2080                     if (!s->mb_intra) {
2081                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2082                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2083                                 s->dquant = 0;
2084                         }
2085                         if (s->mv_type == MV_TYPE_8X8)
2086                             s->dquant = 0;
2087                     }
2088                 }
2089             }
2090         }
2091         ff_set_qscale(s, last_qp + s->dquant);
2092     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2093         ff_set_qscale(s, s->qscale + s->dquant);
2094
2095     wrap_y = s->linesize;
2096     wrap_c = s->uvlinesize;
2097     ptr_y  = s->new_picture.f->data[0] +
2098              (mb_y * 16 * wrap_y)              + mb_x * 16;
2099     ptr_cb = s->new_picture.f->data[1] +
2100              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2101     ptr_cr = s->new_picture.f->data[2] +
2102              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2103
2104     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2105         uint8_t *ebuf = s->edge_emu_buffer + 36 * wrap_y;
2106         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2107         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2108         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2109                                  wrap_y, wrap_y,
2110                                  16, 16, mb_x * 16, mb_y * 16,
2111                                  s->width, s->height);
2112         ptr_y = ebuf;
2113         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2114                                  wrap_c, wrap_c,
2115                                  mb_block_width, mb_block_height,
2116                                  mb_x * mb_block_width, mb_y * mb_block_height,
2117                                  cw, ch);
2118         ptr_cb = ebuf + 16 * wrap_y;
2119         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2120                                  wrap_c, wrap_c,
2121                                  mb_block_width, mb_block_height,
2122                                  mb_x * mb_block_width, mb_y * mb_block_height,
2123                                  cw, ch);
2124         ptr_cr = ebuf + 16 * wrap_y + 16;
2125     }
2126
2127     if (s->mb_intra) {
2128         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2129             int progressive_score, interlaced_score;
2130
2131             s->interlaced_dct = 0;
2132             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2133                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2134                                                      NULL, wrap_y, 8) - 400;
2135
2136             if (progressive_score > 0) {
2137                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2138                                                         NULL, wrap_y * 2, 8) +
2139                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2140                                                         NULL, wrap_y * 2, 8);
2141                 if (progressive_score > interlaced_score) {
2142                     s->interlaced_dct = 1;
2143
2144                     dct_offset = wrap_y;
2145                     uv_dct_offset = wrap_c;
2146                     wrap_y <<= 1;
2147                     if (s->chroma_format == CHROMA_422 ||
2148                         s->chroma_format == CHROMA_444)
2149                         wrap_c <<= 1;
2150                 }
2151             }
2152         }
2153
2154         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2155         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2156         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2157         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2158
2159         if (s->flags & CODEC_FLAG_GRAY) {
2160             skip_dct[4] = 1;
2161             skip_dct[5] = 1;
2162         } else {
2163             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2164             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2165             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2166                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2167                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2168             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2169                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2170                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2171                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2172                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2173                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2174                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2175             }
2176         }
2177     } else {
2178         op_pixels_func (*op_pix)[4];
2179         qpel_mc_func (*op_qpix)[16];
2180         uint8_t *dest_y, *dest_cb, *dest_cr;
2181
2182         dest_y  = s->dest[0];
2183         dest_cb = s->dest[1];
2184         dest_cr = s->dest[2];
2185
2186         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2187             op_pix  = s->hdsp.put_pixels_tab;
2188             op_qpix = s->qdsp.put_qpel_pixels_tab;
2189         } else {
2190             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2191             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2192         }
2193
2194         if (s->mv_dir & MV_DIR_FORWARD) {
2195             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2196                           s->last_picture.f->data,
2197                           op_pix, op_qpix);
2198             op_pix  = s->hdsp.avg_pixels_tab;
2199             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2200         }
2201         if (s->mv_dir & MV_DIR_BACKWARD) {
2202             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2203                           s->next_picture.f->data,
2204                           op_pix, op_qpix);
2205         }
2206
2207         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2208             int progressive_score, interlaced_score;
2209
2210             s->interlaced_dct = 0;
2211             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2212                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2213                                                      ptr_y + wrap_y * 8,
2214                                                      wrap_y, 8) - 400;
2215
2216             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2217                 progressive_score -= 400;
2218
2219             if (progressive_score > 0) {
2220                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2221                                                         wrap_y * 2, 8) +
2222                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2223                                                         ptr_y + wrap_y,
2224                                                         wrap_y * 2, 8);
2225
2226                 if (progressive_score > interlaced_score) {
2227                     s->interlaced_dct = 1;
2228
2229                     dct_offset = wrap_y;
2230                     uv_dct_offset = wrap_c;
2231                     wrap_y <<= 1;
2232                     if (s->chroma_format == CHROMA_422)
2233                         wrap_c <<= 1;
2234                 }
2235             }
2236         }
2237
2238         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2239         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2240         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2241                             dest_y + dct_offset, wrap_y);
2242         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2243                             dest_y + dct_offset + 8, wrap_y);
2244
2245         if (s->flags & CODEC_FLAG_GRAY) {
2246             skip_dct[4] = 1;
2247             skip_dct[5] = 1;
2248         } else {
2249             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2250             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2251             if (!s->chroma_y_shift) { /* 422 */
2252                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2253                                     dest_cb + uv_dct_offset, wrap_c);
2254                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2255                                     dest_cr + uv_dct_offset, wrap_c);
2256             }
2257         }
2258         /* pre quantization */
2259         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2260                 2 * s->qscale * s->qscale) {
2261             // FIXME optimize
2262             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2263                 skip_dct[0] = 1;
2264             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2265                 skip_dct[1] = 1;
2266             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2267                                wrap_y, 8) < 20 * s->qscale)
2268                 skip_dct[2] = 1;
2269             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2270                                wrap_y, 8) < 20 * s->qscale)
2271                 skip_dct[3] = 1;
2272             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2273                 skip_dct[4] = 1;
2274             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2275                 skip_dct[5] = 1;
2276             if (!s->chroma_y_shift) { /* 422 */
2277                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2278                                    dest_cb + uv_dct_offset,
2279                                    wrap_c, 8) < 20 * s->qscale)
2280                     skip_dct[6] = 1;
2281                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2282                                    dest_cr + uv_dct_offset,
2283                                    wrap_c, 8) < 20 * s->qscale)
2284                     skip_dct[7] = 1;
2285             }
2286         }
2287     }
2288
2289     if (s->quantizer_noise_shaping) {
2290         if (!skip_dct[0])
2291             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2292         if (!skip_dct[1])
2293             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2294         if (!skip_dct[2])
2295             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2296         if (!skip_dct[3])
2297             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2298         if (!skip_dct[4])
2299             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2300         if (!skip_dct[5])
2301             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2302         if (!s->chroma_y_shift) { /* 422 */
2303             if (!skip_dct[6])
2304                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2305                                   wrap_c);
2306             if (!skip_dct[7])
2307                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2308                                   wrap_c);
2309         }
2310         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2311     }
2312
2313     /* DCT & quantize */
2314     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2315     {
2316         for (i = 0; i < mb_block_count; i++) {
2317             if (!skip_dct[i]) {
2318                 int overflow;
2319                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2320                 // FIXME we could decide to change to quantizer instead of
2321                 // clipping
2322                 // JS: I don't think that would be a good idea it could lower
2323                 //     quality instead of improve it. Just INTRADC clipping
2324                 //     deserves changes in quantizer
2325                 if (overflow)
2326                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2327             } else
2328                 s->block_last_index[i] = -1;
2329         }
2330         if (s->quantizer_noise_shaping) {
2331             for (i = 0; i < mb_block_count; i++) {
2332                 if (!skip_dct[i]) {
2333                     s->block_last_index[i] =
2334                         dct_quantize_refine(s, s->block[i], weight[i],
2335                                             orig[i], i, s->qscale);
2336                 }
2337             }
2338         }
2339
2340         if (s->luma_elim_threshold && !s->mb_intra)
2341             for (i = 0; i < 4; i++)
2342                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2343         if (s->chroma_elim_threshold && !s->mb_intra)
2344             for (i = 4; i < mb_block_count; i++)
2345                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2346
2347         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2348             for (i = 0; i < mb_block_count; i++) {
2349                 if (s->block_last_index[i] == -1)
2350                     s->coded_score[i] = INT_MAX / 256;
2351             }
2352         }
2353     }
2354
2355     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2356         s->block_last_index[4] =
2357         s->block_last_index[5] = 0;
2358         s->block[4][0] =
2359         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2360         if (!s->chroma_y_shift) { /* 422 / 444 */
2361             for (i=6; i<12; i++) {
2362                 s->block_last_index[i] = 0;
2363                 s->block[i][0] = s->block[4][0];
2364             }
2365         }
2366     }
2367
2368     // non c quantize code returns incorrect block_last_index FIXME
2369     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2370         for (i = 0; i < mb_block_count; i++) {
2371             int j;
2372             if (s->block_last_index[i] > 0) {
2373                 for (j = 63; j > 0; j--) {
2374                     if (s->block[i][s->intra_scantable.permutated[j]])
2375                         break;
2376                 }
2377                 s->block_last_index[i] = j;
2378             }
2379         }
2380     }
2381
2382     /* huffman encode */
2383     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2384     case AV_CODEC_ID_MPEG1VIDEO:
2385     case AV_CODEC_ID_MPEG2VIDEO:
2386         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2387             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2388         break;
2389     case AV_CODEC_ID_MPEG4:
2390         if (CONFIG_MPEG4_ENCODER)
2391             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2392         break;
2393     case AV_CODEC_ID_MSMPEG4V2:
2394     case AV_CODEC_ID_MSMPEG4V3:
2395     case AV_CODEC_ID_WMV1:
2396         if (CONFIG_MSMPEG4_ENCODER)
2397             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2398         break;
2399     case AV_CODEC_ID_WMV2:
2400         if (CONFIG_WMV2_ENCODER)
2401             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2402         break;
2403     case AV_CODEC_ID_H261:
2404         if (CONFIG_H261_ENCODER)
2405             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2406         break;
2407     case AV_CODEC_ID_H263:
2408     case AV_CODEC_ID_H263P:
2409     case AV_CODEC_ID_FLV1:
2410     case AV_CODEC_ID_RV10:
2411     case AV_CODEC_ID_RV20:
2412         if (CONFIG_H263_ENCODER)
2413             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2414         break;
2415     case AV_CODEC_ID_MJPEG:
2416     case AV_CODEC_ID_AMV:
2417         if (CONFIG_MJPEG_ENCODER)
2418             ff_mjpeg_encode_mb(s, s->block);
2419         break;
2420     default:
2421         av_assert1(0);
2422     }
2423 }
2424
2425 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2426 {
2427     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2428     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2429     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2430 }
2431
2432 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2433     int i;
2434
2435     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2436
2437     /* mpeg1 */
2438     d->mb_skip_run= s->mb_skip_run;
2439     for(i=0; i<3; i++)
2440         d->last_dc[i] = s->last_dc[i];
2441
2442     /* statistics */
2443     d->mv_bits= s->mv_bits;
2444     d->i_tex_bits= s->i_tex_bits;
2445     d->p_tex_bits= s->p_tex_bits;
2446     d->i_count= s->i_count;
2447     d->f_count= s->f_count;
2448     d->b_count= s->b_count;
2449     d->skip_count= s->skip_count;
2450     d->misc_bits= s->misc_bits;
2451     d->last_bits= 0;
2452
2453     d->mb_skipped= 0;
2454     d->qscale= s->qscale;
2455     d->dquant= s->dquant;
2456
2457     d->esc3_level_length= s->esc3_level_length;
2458 }
2459
2460 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2461     int i;
2462
2463     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2464     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2465
2466     /* mpeg1 */
2467     d->mb_skip_run= s->mb_skip_run;
2468     for(i=0; i<3; i++)
2469         d->last_dc[i] = s->last_dc[i];
2470
2471     /* statistics */
2472     d->mv_bits= s->mv_bits;
2473     d->i_tex_bits= s->i_tex_bits;
2474     d->p_tex_bits= s->p_tex_bits;
2475     d->i_count= s->i_count;
2476     d->f_count= s->f_count;
2477     d->b_count= s->b_count;
2478     d->skip_count= s->skip_count;
2479     d->misc_bits= s->misc_bits;
2480
2481     d->mb_intra= s->mb_intra;
2482     d->mb_skipped= s->mb_skipped;
2483     d->mv_type= s->mv_type;
2484     d->mv_dir= s->mv_dir;
2485     d->pb= s->pb;
2486     if(s->data_partitioning){
2487         d->pb2= s->pb2;
2488         d->tex_pb= s->tex_pb;
2489     }
2490     d->block= s->block;
2491     for(i=0; i<8; i++)
2492         d->block_last_index[i]= s->block_last_index[i];
2493     d->interlaced_dct= s->interlaced_dct;
2494     d->qscale= s->qscale;
2495
2496     d->esc3_level_length= s->esc3_level_length;
2497 }
2498
2499 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2500                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2501                            int *dmin, int *next_block, int motion_x, int motion_y)
2502 {
2503     int score;
2504     uint8_t *dest_backup[3];
2505
2506     copy_context_before_encode(s, backup, type);
2507
2508     s->block= s->blocks[*next_block];
2509     s->pb= pb[*next_block];
2510     if(s->data_partitioning){
2511         s->pb2   = pb2   [*next_block];
2512         s->tex_pb= tex_pb[*next_block];
2513     }
2514
2515     if(*next_block){
2516         memcpy(dest_backup, s->dest, sizeof(s->dest));
2517         s->dest[0] = s->rd_scratchpad;
2518         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2519         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2520         av_assert0(s->linesize >= 32); //FIXME
2521     }
2522
2523     encode_mb(s, motion_x, motion_y);
2524
2525     score= put_bits_count(&s->pb);
2526     if(s->data_partitioning){
2527         score+= put_bits_count(&s->pb2);
2528         score+= put_bits_count(&s->tex_pb);
2529     }
2530
2531     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2532         ff_mpv_decode_mb(s, s->block);
2533
2534         score *= s->lambda2;
2535         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2536     }
2537
2538     if(*next_block){
2539         memcpy(s->dest, dest_backup, sizeof(s->dest));
2540     }
2541
2542     if(score<*dmin){
2543         *dmin= score;
2544         *next_block^=1;
2545
2546         copy_context_after_encode(best, s, type);
2547     }
2548 }
2549
2550 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2551     uint32_t *sq = ff_square_tab + 256;
2552     int acc=0;
2553     int x,y;
2554
2555     if(w==16 && h==16)
2556         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2557     else if(w==8 && h==8)
2558         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2559
2560     for(y=0; y<h; y++){
2561         for(x=0; x<w; x++){
2562             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2563         }
2564     }
2565
2566     av_assert2(acc>=0);
2567
2568     return acc;
2569 }
2570
2571 static int sse_mb(MpegEncContext *s){
2572     int w= 16;
2573     int h= 16;
2574
2575     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2576     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2577
2578     if(w==16 && h==16)
2579       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2580         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2581                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2582                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2583       }else{
2584         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2585                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2586                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2587       }
2588     else
2589         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2590                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2591                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2592 }
2593
2594 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2595     MpegEncContext *s= *(void**)arg;
2596
2597
2598     s->me.pre_pass=1;
2599     s->me.dia_size= s->avctx->pre_dia_size;
2600     s->first_slice_line=1;
2601     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2602         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2603             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2604         }
2605         s->first_slice_line=0;
2606     }
2607
2608     s->me.pre_pass=0;
2609
2610     return 0;
2611 }
2612
2613 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2614     MpegEncContext *s= *(void**)arg;
2615
2616     ff_check_alignment();
2617
2618     s->me.dia_size= s->avctx->dia_size;
2619     s->first_slice_line=1;
2620     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2621         s->mb_x=0; //for block init below
2622         ff_init_block_index(s);
2623         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2624             s->block_index[0]+=2;
2625             s->block_index[1]+=2;
2626             s->block_index[2]+=2;
2627             s->block_index[3]+=2;
2628
2629             /* compute motion vector & mb_type and store in context */
2630             if(s->pict_type==AV_PICTURE_TYPE_B)
2631                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2632             else
2633                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2634         }
2635         s->first_slice_line=0;
2636     }
2637     return 0;
2638 }
2639
2640 static int mb_var_thread(AVCodecContext *c, void *arg){
2641     MpegEncContext *s= *(void**)arg;
2642     int mb_x, mb_y;
2643
2644     ff_check_alignment();
2645
2646     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2647         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2648             int xx = mb_x * 16;
2649             int yy = mb_y * 16;
2650             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2651             int varc;
2652             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2653
2654             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2655                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2656
2657             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2658             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2659             s->me.mb_var_sum_temp    += varc;
2660         }
2661     }
2662     return 0;
2663 }
2664
2665 static void write_slice_end(MpegEncContext *s){
2666     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2667         if(s->partitioned_frame){
2668             ff_mpeg4_merge_partitions(s);
2669         }
2670
2671         ff_mpeg4_stuffing(&s->pb);
2672     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2673         ff_mjpeg_encode_stuffing(s);
2674     }
2675
2676     avpriv_align_put_bits(&s->pb);
2677     flush_put_bits(&s->pb);
2678
2679     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2680         s->misc_bits+= get_bits_diff(s);
2681 }
2682
2683 static void write_mb_info(MpegEncContext *s)
2684 {
2685     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2686     int offset = put_bits_count(&s->pb);
2687     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2688     int gobn = s->mb_y / s->gob_index;
2689     int pred_x, pred_y;
2690     if (CONFIG_H263_ENCODER)
2691         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2692     bytestream_put_le32(&ptr, offset);
2693     bytestream_put_byte(&ptr, s->qscale);
2694     bytestream_put_byte(&ptr, gobn);
2695     bytestream_put_le16(&ptr, mba);
2696     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2697     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2698     /* 4MV not implemented */
2699     bytestream_put_byte(&ptr, 0); /* hmv2 */
2700     bytestream_put_byte(&ptr, 0); /* vmv2 */
2701 }
2702
2703 static void update_mb_info(MpegEncContext *s, int startcode)
2704 {
2705     if (!s->mb_info)
2706         return;
2707     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2708         s->mb_info_size += 12;
2709         s->prev_mb_info = s->last_mb_info;
2710     }
2711     if (startcode) {
2712         s->prev_mb_info = put_bits_count(&s->pb)/8;
2713         /* This might have incremented mb_info_size above, and we return without
2714          * actually writing any info into that slot yet. But in that case,
2715          * this will be called again at the start of the after writing the
2716          * start code, actually writing the mb info. */
2717         return;
2718     }
2719
2720     s->last_mb_info = put_bits_count(&s->pb)/8;
2721     if (!s->mb_info_size)
2722         s->mb_info_size += 12;
2723     write_mb_info(s);
2724 }
2725
2726 static int encode_thread(AVCodecContext *c, void *arg){
2727     MpegEncContext *s= *(void**)arg;
2728     int mb_x, mb_y, pdif = 0;
2729     int chr_h= 16>>s->chroma_y_shift;
2730     int i, j;
2731     MpegEncContext best_s = { 0 }, backup_s;
2732     uint8_t bit_buf[2][MAX_MB_BYTES];
2733     uint8_t bit_buf2[2][MAX_MB_BYTES];
2734     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2735     PutBitContext pb[2], pb2[2], tex_pb[2];
2736
2737     ff_check_alignment();
2738
2739     for(i=0; i<2; i++){
2740         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2741         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2742         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2743     }
2744
2745     s->last_bits= put_bits_count(&s->pb);
2746     s->mv_bits=0;
2747     s->misc_bits=0;
2748     s->i_tex_bits=0;
2749     s->p_tex_bits=0;
2750     s->i_count=0;
2751     s->f_count=0;
2752     s->b_count=0;
2753     s->skip_count=0;
2754
2755     for(i=0; i<3; i++){
2756         /* init last dc values */
2757         /* note: quant matrix value (8) is implied here */
2758         s->last_dc[i] = 128 << s->intra_dc_precision;
2759
2760         s->current_picture.error[i] = 0;
2761     }
2762     if(s->codec_id==AV_CODEC_ID_AMV){
2763         s->last_dc[0] = 128*8/13;
2764         s->last_dc[1] = 128*8/14;
2765         s->last_dc[2] = 128*8/14;
2766     }
2767     s->mb_skip_run = 0;
2768     memset(s->last_mv, 0, sizeof(s->last_mv));
2769
2770     s->last_mv_dir = 0;
2771
2772     switch(s->codec_id){
2773     case AV_CODEC_ID_H263:
2774     case AV_CODEC_ID_H263P:
2775     case AV_CODEC_ID_FLV1:
2776         if (CONFIG_H263_ENCODER)
2777             s->gob_index = ff_h263_get_gob_height(s);
2778         break;
2779     case AV_CODEC_ID_MPEG4:
2780         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2781             ff_mpeg4_init_partitions(s);
2782         break;
2783     }
2784
2785     s->resync_mb_x=0;
2786     s->resync_mb_y=0;
2787     s->first_slice_line = 1;
2788     s->ptr_lastgob = s->pb.buf;
2789     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2790         s->mb_x=0;
2791         s->mb_y= mb_y;
2792
2793         ff_set_qscale(s, s->qscale);
2794         ff_init_block_index(s);
2795
2796         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2797             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2798             int mb_type= s->mb_type[xy];
2799 //            int d;
2800             int dmin= INT_MAX;
2801             int dir;
2802
2803             if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES
2804                 && s->slice_context_count == 1
2805                 && s->pb.buf == s->avctx->internal->byte_buffer) {
2806                 int new_size =  s->avctx->internal->byte_buffer_size
2807                               + s->avctx->internal->byte_buffer_size/4
2808                               + s->mb_width*MAX_MB_BYTES;
2809                 int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2810                 int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2811
2812                 uint8_t *new_buffer = NULL;
2813                 int new_buffer_size = 0;
2814
2815                 av_fast_padded_malloc(&new_buffer, &new_buffer_size, new_size);
2816                 if (new_buffer) {
2817                     memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2818                     av_free(s->avctx->internal->byte_buffer);
2819                     s->avctx->internal->byte_buffer      = new_buffer;
2820                     s->avctx->internal->byte_buffer_size = new_buffer_size;
2821                     rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2822                     s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2823                     s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2824                 }
2825             }
2826             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2827                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2828                 return -1;
2829             }
2830             if(s->data_partitioning){
2831                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2832                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2833                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2834                     return -1;
2835                 }
2836             }
2837
2838             s->mb_x = mb_x;
2839             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2840             ff_update_block_index(s);
2841
2842             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2843                 ff_h261_reorder_mb_index(s);
2844                 xy= s->mb_y*s->mb_stride + s->mb_x;
2845                 mb_type= s->mb_type[xy];
2846             }
2847
2848             /* write gob / video packet header  */
2849             if(s->rtp_mode){
2850                 int current_packet_size, is_gob_start;
2851
2852                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2853
2854                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2855
2856                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2857
2858                 switch(s->codec_id){
2859                 case AV_CODEC_ID_H263:
2860                 case AV_CODEC_ID_H263P:
2861                     if(!s->h263_slice_structured)
2862                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2863                     break;
2864                 case AV_CODEC_ID_MPEG2VIDEO:
2865                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2866                 case AV_CODEC_ID_MPEG1VIDEO:
2867                     if(s->mb_skip_run) is_gob_start=0;
2868                     break;
2869                 case AV_CODEC_ID_MJPEG:
2870                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2871                     break;
2872                 }
2873
2874                 if(is_gob_start){
2875                     if(s->start_mb_y != mb_y || mb_x!=0){
2876                         write_slice_end(s);
2877
2878                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2879                             ff_mpeg4_init_partitions(s);
2880                         }
2881                     }
2882
2883                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2884                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2885
2886                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2887                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2888                         int d = 100 / s->error_rate;
2889                         if(r % d == 0){
2890                             current_packet_size=0;
2891                             s->pb.buf_ptr= s->ptr_lastgob;
2892                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2893                         }
2894                     }
2895
2896                     if (s->avctx->rtp_callback){
2897                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2898                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2899                     }
2900                     update_mb_info(s, 1);
2901
2902                     switch(s->codec_id){
2903                     case AV_CODEC_ID_MPEG4:
2904                         if (CONFIG_MPEG4_ENCODER) {
2905                             ff_mpeg4_encode_video_packet_header(s);
2906                             ff_mpeg4_clean_buffers(s);
2907                         }
2908                     break;
2909                     case AV_CODEC_ID_MPEG1VIDEO:
2910                     case AV_CODEC_ID_MPEG2VIDEO:
2911                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2912                             ff_mpeg1_encode_slice_header(s);
2913                             ff_mpeg1_clean_buffers(s);
2914                         }
2915                     break;
2916                     case AV_CODEC_ID_H263:
2917                     case AV_CODEC_ID_H263P:
2918                         if (CONFIG_H263_ENCODER)
2919                             ff_h263_encode_gob_header(s, mb_y);
2920                     break;
2921                     }
2922
2923                     if(s->flags&CODEC_FLAG_PASS1){
2924                         int bits= put_bits_count(&s->pb);
2925                         s->misc_bits+= bits - s->last_bits;
2926                         s->last_bits= bits;
2927                     }
2928
2929                     s->ptr_lastgob += current_packet_size;
2930                     s->first_slice_line=1;
2931                     s->resync_mb_x=mb_x;
2932                     s->resync_mb_y=mb_y;
2933                 }
2934             }
2935
2936             if(  (s->resync_mb_x   == s->mb_x)
2937                && s->resync_mb_y+1 == s->mb_y){
2938                 s->first_slice_line=0;
2939             }
2940
2941             s->mb_skipped=0;
2942             s->dquant=0; //only for QP_RD
2943
2944             update_mb_info(s, 0);
2945
2946             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2947                 int next_block=0;
2948                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2949
2950                 copy_context_before_encode(&backup_s, s, -1);
2951                 backup_s.pb= s->pb;
2952                 best_s.data_partitioning= s->data_partitioning;
2953                 best_s.partitioned_frame= s->partitioned_frame;
2954                 if(s->data_partitioning){
2955                     backup_s.pb2= s->pb2;
2956                     backup_s.tex_pb= s->tex_pb;
2957                 }
2958
2959                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2960                     s->mv_dir = MV_DIR_FORWARD;
2961                     s->mv_type = MV_TYPE_16X16;
2962                     s->mb_intra= 0;
2963                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2964                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2965                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2966                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2967                 }
2968                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2969                     s->mv_dir = MV_DIR_FORWARD;
2970                     s->mv_type = MV_TYPE_FIELD;
2971                     s->mb_intra= 0;
2972                     for(i=0; i<2; i++){
2973                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2974                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2975                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2976                     }
2977                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2978                                  &dmin, &next_block, 0, 0);
2979                 }
2980                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2981                     s->mv_dir = MV_DIR_FORWARD;
2982                     s->mv_type = MV_TYPE_16X16;
2983                     s->mb_intra= 0;
2984                     s->mv[0][0][0] = 0;
2985                     s->mv[0][0][1] = 0;
2986                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2987                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2988                 }
2989                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2990                     s->mv_dir = MV_DIR_FORWARD;
2991                     s->mv_type = MV_TYPE_8X8;
2992                     s->mb_intra= 0;
2993                     for(i=0; i<4; i++){
2994                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2995                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2996                     }
2997                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2998                                  &dmin, &next_block, 0, 0);
2999                 }
3000                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3001                     s->mv_dir = MV_DIR_FORWARD;
3002                     s->mv_type = MV_TYPE_16X16;
3003                     s->mb_intra= 0;
3004                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3005                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3006                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3007                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3008                 }
3009                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3010                     s->mv_dir = MV_DIR_BACKWARD;
3011                     s->mv_type = MV_TYPE_16X16;
3012                     s->mb_intra= 0;
3013                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3014                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3015                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3016                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3017                 }
3018                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3019                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3020                     s->mv_type = MV_TYPE_16X16;
3021                     s->mb_intra= 0;
3022                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3023                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3024                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3025                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3026                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3027                                  &dmin, &next_block, 0, 0);
3028                 }
3029                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3030                     s->mv_dir = MV_DIR_FORWARD;
3031                     s->mv_type = MV_TYPE_FIELD;
3032                     s->mb_intra= 0;
3033                     for(i=0; i<2; i++){
3034                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3035                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3036                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3037                     }
3038                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3039                                  &dmin, &next_block, 0, 0);
3040                 }
3041                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3042                     s->mv_dir = MV_DIR_BACKWARD;
3043                     s->mv_type = MV_TYPE_FIELD;
3044                     s->mb_intra= 0;
3045                     for(i=0; i<2; i++){
3046                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3047                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3048                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3049                     }
3050                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3051                                  &dmin, &next_block, 0, 0);
3052                 }
3053                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3054                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3055                     s->mv_type = MV_TYPE_FIELD;
3056                     s->mb_intra= 0;
3057                     for(dir=0; dir<2; dir++){
3058                         for(i=0; i<2; i++){
3059                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3060                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3061                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3062                         }
3063                     }
3064                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3065                                  &dmin, &next_block, 0, 0);
3066                 }
3067                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3068                     s->mv_dir = 0;
3069                     s->mv_type = MV_TYPE_16X16;
3070                     s->mb_intra= 1;
3071                     s->mv[0][0][0] = 0;
3072                     s->mv[0][0][1] = 0;
3073                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3074                                  &dmin, &next_block, 0, 0);
3075                     if(s->h263_pred || s->h263_aic){
3076                         if(best_s.mb_intra)
3077                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3078                         else
3079                             ff_clean_intra_table_entries(s); //old mode?
3080                     }
3081                 }
3082
3083                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3084                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3085                         const int last_qp= backup_s.qscale;
3086                         int qpi, qp, dc[6];
3087                         int16_t ac[6][16];
3088                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3089                         static const int dquant_tab[4]={-1,1,-2,2};
3090                         int storecoefs = s->mb_intra && s->dc_val[0];
3091
3092                         av_assert2(backup_s.dquant == 0);
3093
3094                         //FIXME intra
3095                         s->mv_dir= best_s.mv_dir;
3096                         s->mv_type = MV_TYPE_16X16;
3097                         s->mb_intra= best_s.mb_intra;
3098                         s->mv[0][0][0] = best_s.mv[0][0][0];
3099                         s->mv[0][0][1] = best_s.mv[0][0][1];
3100                         s->mv[1][0][0] = best_s.mv[1][0][0];
3101                         s->mv[1][0][1] = best_s.mv[1][0][1];
3102
3103                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3104                         for(; qpi<4; qpi++){
3105                             int dquant= dquant_tab[qpi];
3106                             qp= last_qp + dquant;
3107                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3108                                 continue;
3109                             backup_s.dquant= dquant;
3110                             if(storecoefs){
3111                                 for(i=0; i<6; i++){
3112                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3113                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3114                                 }
3115                             }
3116
3117                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3118                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3119                             if(best_s.qscale != qp){
3120                                 if(storecoefs){
3121                                     for(i=0; i<6; i++){
3122                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3123                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3124                                     }
3125                                 }
3126                             }
3127                         }
3128                     }
3129                 }
3130                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3131                     int mx= s->b_direct_mv_table[xy][0];
3132                     int my= s->b_direct_mv_table[xy][1];
3133
3134                     backup_s.dquant = 0;
3135                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3136                     s->mb_intra= 0;
3137                     ff_mpeg4_set_direct_mv(s, mx, my);
3138                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3139                                  &dmin, &next_block, mx, my);
3140                 }
3141                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3142                     backup_s.dquant = 0;
3143                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3144                     s->mb_intra= 0;
3145                     ff_mpeg4_set_direct_mv(s, 0, 0);
3146                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3147                                  &dmin, &next_block, 0, 0);
3148                 }
3149                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3150                     int coded=0;
3151                     for(i=0; i<6; i++)
3152                         coded |= s->block_last_index[i];
3153                     if(coded){
3154                         int mx,my;
3155                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3156                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3157                             mx=my=0; //FIXME find the one we actually used
3158                             ff_mpeg4_set_direct_mv(s, mx, my);
3159                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3160                             mx= s->mv[1][0][0];
3161                             my= s->mv[1][0][1];
3162                         }else{
3163                             mx= s->mv[0][0][0];
3164                             my= s->mv[0][0][1];
3165                         }
3166
3167                         s->mv_dir= best_s.mv_dir;
3168                         s->mv_type = best_s.mv_type;
3169                         s->mb_intra= 0;
3170 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3171                         s->mv[0][0][1] = best_s.mv[0][0][1];
3172                         s->mv[1][0][0] = best_s.mv[1][0][0];
3173                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3174                         backup_s.dquant= 0;
3175                         s->skipdct=1;
3176                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3177                                         &dmin, &next_block, mx, my);
3178                         s->skipdct=0;
3179                     }
3180                 }
3181
3182                 s->current_picture.qscale_table[xy] = best_s.qscale;
3183
3184                 copy_context_after_encode(s, &best_s, -1);
3185
3186                 pb_bits_count= put_bits_count(&s->pb);
3187                 flush_put_bits(&s->pb);
3188                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3189                 s->pb= backup_s.pb;
3190
3191                 if(s->data_partitioning){
3192                     pb2_bits_count= put_bits_count(&s->pb2);
3193                     flush_put_bits(&s->pb2);
3194                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3195                     s->pb2= backup_s.pb2;
3196
3197                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3198                     flush_put_bits(&s->tex_pb);
3199                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3200                     s->tex_pb= backup_s.tex_pb;
3201                 }
3202                 s->last_bits= put_bits_count(&s->pb);
3203
3204                 if (CONFIG_H263_ENCODER &&
3205                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3206                     ff_h263_update_motion_val(s);
3207
3208                 if(next_block==0){ //FIXME 16 vs linesize16
3209                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3210                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3211                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3212                 }
3213
3214                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3215                     ff_mpv_decode_mb(s, s->block);
3216             } else {
3217                 int motion_x = 0, motion_y = 0;
3218                 s->mv_type=MV_TYPE_16X16;
3219                 // only one MB-Type possible
3220
3221                 switch(mb_type){
3222                 case CANDIDATE_MB_TYPE_INTRA:
3223                     s->mv_dir = 0;
3224                     s->mb_intra= 1;
3225                     motion_x= s->mv[0][0][0] = 0;
3226                     motion_y= s->mv[0][0][1] = 0;
3227                     break;
3228                 case CANDIDATE_MB_TYPE_INTER:
3229                     s->mv_dir = MV_DIR_FORWARD;
3230                     s->mb_intra= 0;
3231                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3232                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3233                     break;
3234                 case CANDIDATE_MB_TYPE_INTER_I:
3235                     s->mv_dir = MV_DIR_FORWARD;
3236                     s->mv_type = MV_TYPE_FIELD;
3237                     s->mb_intra= 0;
3238                     for(i=0; i<2; i++){
3239                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3240                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3241                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3242                     }
3243                     break;
3244                 case CANDIDATE_MB_TYPE_INTER4V:
3245                     s->mv_dir = MV_DIR_FORWARD;
3246                     s->mv_type = MV_TYPE_8X8;
3247                     s->mb_intra= 0;
3248                     for(i=0; i<4; i++){
3249                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3250                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3251                     }
3252                     break;
3253                 case CANDIDATE_MB_TYPE_DIRECT:
3254                     if (CONFIG_MPEG4_ENCODER) {
3255                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3256                         s->mb_intra= 0;
3257                         motion_x=s->b_direct_mv_table[xy][0];
3258                         motion_y=s->b_direct_mv_table[xy][1];
3259                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3260                     }
3261                     break;
3262                 case CANDIDATE_MB_TYPE_DIRECT0:
3263                     if (CONFIG_MPEG4_ENCODER) {
3264                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3265                         s->mb_intra= 0;
3266                         ff_mpeg4_set_direct_mv(s, 0, 0);
3267                     }
3268                     break;
3269                 case CANDIDATE_MB_TYPE_BIDIR:
3270                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3271                     s->mb_intra= 0;
3272                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3273                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3274                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3275                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3276                     break;
3277                 case CANDIDATE_MB_TYPE_BACKWARD:
3278                     s->mv_dir = MV_DIR_BACKWARD;
3279                     s->mb_intra= 0;
3280                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3281                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3282                     break;
3283                 case CANDIDATE_MB_TYPE_FORWARD:
3284                     s->mv_dir = MV_DIR_FORWARD;
3285                     s->mb_intra= 0;
3286                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3287                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3288                     break;
3289                 case CANDIDATE_MB_TYPE_FORWARD_I:
3290                     s->mv_dir = MV_DIR_FORWARD;
3291                     s->mv_type = MV_TYPE_FIELD;
3292                     s->mb_intra= 0;
3293                     for(i=0; i<2; i++){
3294                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3295                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3296                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3297                     }
3298                     break;
3299                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3300                     s->mv_dir = MV_DIR_BACKWARD;
3301                     s->mv_type = MV_TYPE_FIELD;
3302                     s->mb_intra= 0;
3303                     for(i=0; i<2; i++){
3304                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3305                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3306                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3307                     }
3308                     break;
3309                 case CANDIDATE_MB_TYPE_BIDIR_I:
3310                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3311                     s->mv_type = MV_TYPE_FIELD;
3312                     s->mb_intra= 0;
3313                     for(dir=0; dir<2; dir++){
3314                         for(i=0; i<2; i++){
3315                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3316                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3317                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3318                         }
3319                     }
3320                     break;
3321                 default:
3322                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3323                 }
3324
3325                 encode_mb(s, motion_x, motion_y);
3326
3327                 // RAL: Update last macroblock type
3328                 s->last_mv_dir = s->mv_dir;
3329
3330                 if (CONFIG_H263_ENCODER &&
3331                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3332                     ff_h263_update_motion_val(s);
3333
3334                 ff_mpv_decode_mb(s, s->block);
3335             }
3336
3337             /* clean the MV table in IPS frames for direct mode in B frames */
3338             if(s->mb_intra /* && I,P,S_TYPE */){
3339                 s->p_mv_table[xy][0]=0;
3340                 s->p_mv_table[xy][1]=0;
3341             }
3342
3343             if(s->flags&CODEC_FLAG_PSNR){
3344                 int w= 16;
3345                 int h= 16;
3346
3347                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3348                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3349
3350                 s->current_picture.error[0] += sse(
3351                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3352                     s->dest[0], w, h, s->linesize);
3353                 s->current_picture.error[1] += sse(
3354                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3355                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3356                 s->current_picture.error[2] += sse(
3357                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3358                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3359             }
3360             if(s->loop_filter){
3361                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3362                     ff_h263_loop_filter(s);
3363             }
3364             av_dlog(s->avctx, "MB %d %d bits\n",
3365                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3366         }
3367     }
3368
3369     //not beautiful here but we must write it before flushing so it has to be here
3370     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3371         ff_msmpeg4_encode_ext_header(s);
3372
3373     write_slice_end(s);
3374
3375     /* Send the last GOB if RTP */
3376     if (s->avctx->rtp_callback) {
3377         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3378         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3379         /* Call the RTP callback to send the last GOB */
3380         emms_c();
3381         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3382     }
3383
3384     return 0;
3385 }
3386
3387 #define MERGE(field) dst->field += src->field; src->field=0
3388 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3389     MERGE(me.scene_change_score);
3390     MERGE(me.mc_mb_var_sum_temp);
3391     MERGE(me.mb_var_sum_temp);
3392 }
3393
3394 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3395     int i;
3396
3397     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3398     MERGE(dct_count[1]);
3399     MERGE(mv_bits);
3400     MERGE(i_tex_bits);
3401     MERGE(p_tex_bits);
3402     MERGE(i_count);
3403     MERGE(f_count);
3404     MERGE(b_count);
3405     MERGE(skip_count);
3406     MERGE(misc_bits);
3407     MERGE(er.error_count);
3408     MERGE(padding_bug_score);
3409     MERGE(current_picture.error[0]);
3410     MERGE(current_picture.error[1]);
3411     MERGE(current_picture.error[2]);
3412
3413     if(dst->avctx->noise_reduction){
3414         for(i=0; i<64; i++){
3415             MERGE(dct_error_sum[0][i]);
3416             MERGE(dct_error_sum[1][i]);
3417         }
3418     }
3419
3420     assert(put_bits_count(&src->pb) % 8 ==0);
3421     assert(put_bits_count(&dst->pb) % 8 ==0);
3422     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3423     flush_put_bits(&dst->pb);
3424 }
3425
3426 static int estimate_qp(MpegEncContext *s, int dry_run){
3427     if (s->next_lambda){
3428         s->current_picture_ptr->f->quality =
3429         s->current_picture.f->quality = s->next_lambda;
3430         if(!dry_run) s->next_lambda= 0;
3431     } else if (!s->fixed_qscale) {
3432         s->current_picture_ptr->f->quality =
3433         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3434         if (s->current_picture.f->quality < 0)
3435             return -1;
3436     }
3437
3438     if(s->adaptive_quant){
3439         switch(s->codec_id){
3440         case AV_CODEC_ID_MPEG4:
3441             if (CONFIG_MPEG4_ENCODER)
3442                 ff_clean_mpeg4_qscales(s);
3443             break;
3444         case AV_CODEC_ID_H263:
3445         case AV_CODEC_ID_H263P:
3446         case AV_CODEC_ID_FLV1:
3447             if (CONFIG_H263_ENCODER)
3448                 ff_clean_h263_qscales(s);
3449             break;
3450         default:
3451             ff_init_qscale_tab(s);
3452         }
3453
3454         s->lambda= s->lambda_table[0];
3455         //FIXME broken
3456     }else
3457         s->lambda = s->current_picture.f->quality;
3458     update_qscale(s);
3459     return 0;
3460 }
3461
3462 /* must be called before writing the header */
3463 static void set_frame_distances(MpegEncContext * s){
3464     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3465     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3466
3467     if(s->pict_type==AV_PICTURE_TYPE_B){
3468         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3469         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3470     }else{
3471         s->pp_time= s->time - s->last_non_b_time;
3472         s->last_non_b_time= s->time;
3473         assert(s->picture_number==0 || s->pp_time > 0);
3474     }
3475 }
3476
3477 static int encode_picture(MpegEncContext *s, int picture_number)
3478 {
3479     int i, ret;
3480     int bits;
3481     int context_count = s->slice_context_count;
3482
3483     s->picture_number = picture_number;
3484
3485     /* Reset the average MB variance */
3486     s->me.mb_var_sum_temp    =
3487     s->me.mc_mb_var_sum_temp = 0;
3488
3489     /* we need to initialize some time vars before we can encode b-frames */
3490     // RAL: Condition added for MPEG1VIDEO
3491     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3492         set_frame_distances(s);
3493     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3494         ff_set_mpeg4_time(s);
3495
3496     s->me.scene_change_score=0;
3497
3498 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3499
3500     if(s->pict_type==AV_PICTURE_TYPE_I){
3501         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3502         else                        s->no_rounding=0;
3503     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3504         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3505             s->no_rounding ^= 1;
3506     }
3507
3508     if(s->flags & CODEC_FLAG_PASS2){
3509         if (estimate_qp(s,1) < 0)
3510             return -1;
3511         ff_get_2pass_fcode(s);
3512     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3513         if(s->pict_type==AV_PICTURE_TYPE_B)
3514             s->lambda= s->last_lambda_for[s->pict_type];
3515         else
3516             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3517         update_qscale(s);
3518     }
3519
3520     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3521         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3522         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3523         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3524         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3525     }
3526
3527     s->mb_intra=0; //for the rate distortion & bit compare functions
3528     for(i=1; i<context_count; i++){
3529         ret = ff_update_duplicate_context(s->thread_context[i], s);
3530         if (ret < 0)
3531             return ret;
3532     }
3533
3534     if(ff_init_me(s)<0)
3535         return -1;
3536
3537     /* Estimate motion for every MB */
3538     if(s->pict_type != AV_PICTURE_TYPE_I){
3539         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3540         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3541         if (s->pict_type != AV_PICTURE_TYPE_B) {
3542             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3543                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3544             }
3545         }
3546
3547         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3548     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3549         /* I-Frame */
3550         for(i=0; i<s->mb_stride*s->mb_height; i++)
3551             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3552
3553         if(!s->fixed_qscale){
3554             /* finding spatial complexity for I-frame rate control */
3555             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3556         }
3557     }
3558     for(i=1; i<context_count; i++){
3559         merge_context_after_me(s, s->thread_context[i]);
3560     }
3561     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3562     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3563     emms_c();
3564
3565     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3566         s->pict_type= AV_PICTURE_TYPE_I;
3567         for(i=0; i<s->mb_stride*s->mb_height; i++)
3568             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3569         if(s->msmpeg4_version >= 3)
3570             s->no_rounding=1;
3571         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3572                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3573     }
3574
3575     if(!s->umvplus){
3576         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3577             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3578
3579             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3580                 int a,b;
3581                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3582                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3583                 s->f_code= FFMAX3(s->f_code, a, b);
3584             }
3585
3586             ff_fix_long_p_mvs(s);
3587             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3588             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3589                 int j;
3590                 for(i=0; i<2; i++){
3591                     for(j=0; j<2; j++)
3592                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3593                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3594                 }
3595             }
3596         }
3597
3598         if(s->pict_type==AV_PICTURE_TYPE_B){
3599             int a, b;
3600
3601             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3602             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3603             s->f_code = FFMAX(a, b);
3604
3605             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3606             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3607             s->b_code = FFMAX(a, b);
3608
3609             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3610             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3611             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3612             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3613             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3614                 int dir, j;
3615                 for(dir=0; dir<2; dir++){
3616                     for(i=0; i<2; i++){
3617                         for(j=0; j<2; j++){
3618                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3619                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3620                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3621                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3622                         }
3623                     }
3624                 }
3625             }
3626         }
3627     }
3628
3629     if (estimate_qp(s, 0) < 0)
3630         return -1;
3631
3632     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3633         s->qscale= 3; //reduce clipping problems
3634
3635     if (s->out_format == FMT_MJPEG) {
3636         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3637         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3638
3639         if (s->avctx->intra_matrix) {
3640             chroma_matrix =
3641             luma_matrix = s->avctx->intra_matrix;
3642         }
3643         if (s->avctx->chroma_intra_matrix)
3644             chroma_matrix = s->avctx->chroma_intra_matrix;
3645
3646         /* for mjpeg, we do include qscale in the matrix */
3647         for(i=1;i<64;i++){
3648             int j = s->idsp.idct_permutation[i];
3649
3650             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3651             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3652         }
3653         s->y_dc_scale_table=
3654         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3655         s->chroma_intra_matrix[0] =
3656         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3657         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3658                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3659         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3660                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3661         s->qscale= 8;
3662     }
3663     if(s->codec_id == AV_CODEC_ID_AMV){
3664         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3665         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3666         for(i=1;i<64;i++){
3667             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3668
3669             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3670             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3671         }
3672         s->y_dc_scale_table= y;
3673         s->c_dc_scale_table= c;
3674         s->intra_matrix[0] = 13;
3675         s->chroma_intra_matrix[0] = 14;
3676         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3677                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3678         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3679                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3680         s->qscale= 8;
3681     }
3682
3683     //FIXME var duplication
3684     s->current_picture_ptr->f->key_frame =
3685     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3686     s->current_picture_ptr->f->pict_type =
3687     s->current_picture.f->pict_type = s->pict_type;
3688
3689     if (s->current_picture.f->key_frame)
3690         s->picture_in_gop_number=0;
3691
3692     s->mb_x = s->mb_y = 0;
3693     s->last_bits= put_bits_count(&s->pb);
3694     switch(s->out_format) {
3695     case FMT_MJPEG:
3696         if (CONFIG_MJPEG_ENCODER)
3697             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3698                                            s->intra_matrix, s->chroma_intra_matrix);
3699         break;
3700     case FMT_H261:
3701         if (CONFIG_H261_ENCODER)
3702             ff_h261_encode_picture_header(s, picture_number);
3703         break;
3704     case FMT_H263:
3705         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3706             ff_wmv2_encode_picture_header(s, picture_number);
3707         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3708             ff_msmpeg4_encode_picture_header(s, picture_number);
3709         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3710             ff_mpeg4_encode_picture_header(s, picture_number);
3711         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3712             ret = ff_rv10_encode_picture_header(s, picture_number);
3713             if (ret < 0)
3714                 return ret;
3715         }
3716         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3717             ff_rv20_encode_picture_header(s, picture_number);
3718         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3719             ff_flv_encode_picture_header(s, picture_number);
3720         else if (CONFIG_H263_ENCODER)
3721             ff_h263_encode_picture_header(s, picture_number);
3722         break;
3723     case FMT_MPEG1:
3724         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3725             ff_mpeg1_encode_picture_header(s, picture_number);
3726         break;
3727     default:
3728         av_assert0(0);
3729     }
3730     bits= put_bits_count(&s->pb);
3731     s->header_bits= bits - s->last_bits;
3732
3733     for(i=1; i<context_count; i++){
3734         update_duplicate_context_after_me(s->thread_context[i], s);
3735     }
3736     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3737     for(i=1; i<context_count; i++){
3738         merge_context_after_encode(s, s->thread_context[i]);
3739     }
3740     emms_c();
3741     return 0;
3742 }
3743
3744 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3745     const int intra= s->mb_intra;
3746     int i;
3747
3748     s->dct_count[intra]++;
3749
3750     for(i=0; i<64; i++){
3751         int level= block[i];
3752
3753         if(level){
3754             if(level>0){
3755                 s->dct_error_sum[intra][i] += level;
3756                 level -= s->dct_offset[intra][i];
3757                 if(level<0) level=0;
3758             }else{
3759                 s->dct_error_sum[intra][i] -= level;
3760                 level += s->dct_offset[intra][i];
3761                 if(level>0) level=0;
3762             }
3763             block[i]= level;
3764         }
3765     }
3766 }
3767
3768 static int dct_quantize_trellis_c(MpegEncContext *s,
3769                                   int16_t *block, int n,
3770                                   int qscale, int *overflow){
3771     const int *qmat;
3772     const uint16_t *matrix;
3773     const uint8_t *scantable= s->intra_scantable.scantable;
3774     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3775     int max=0;
3776     unsigned int threshold1, threshold2;
3777     int bias=0;
3778     int run_tab[65];
3779     int level_tab[65];
3780     int score_tab[65];
3781     int survivor[65];
3782     int survivor_count;
3783     int last_run=0;
3784     int last_level=0;
3785     int last_score= 0;
3786     int last_i;
3787     int coeff[2][64];
3788     int coeff_count[64];
3789     int qmul, qadd, start_i, last_non_zero, i, dc;
3790     const int esc_length= s->ac_esc_length;
3791     uint8_t * length;
3792     uint8_t * last_length;
3793     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3794
3795     s->fdsp.fdct(block);
3796
3797     if(s->dct_error_sum)
3798         s->denoise_dct(s, block);
3799     qmul= qscale*16;
3800     qadd= ((qscale-1)|1)*8;
3801
3802     if (s->mb_intra) {
3803         int q;
3804         if (!s->h263_aic) {
3805             if (n < 4)
3806                 q = s->y_dc_scale;
3807             else
3808                 q = s->c_dc_scale;
3809             q = q << 3;
3810         } else{
3811             /* For AIC we skip quant/dequant of INTRADC */
3812             q = 1 << 3;
3813             qadd=0;
3814         }
3815
3816         /* note: block[0] is assumed to be positive */
3817         block[0] = (block[0] + (q >> 1)) / q;
3818         start_i = 1;
3819         last_non_zero = 0;
3820         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3821         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3822         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3823             bias= 1<<(QMAT_SHIFT-1);
3824
3825         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3826             length     = s->intra_chroma_ac_vlc_length;
3827             last_length= s->intra_chroma_ac_vlc_last_length;
3828         } else {
3829             length     = s->intra_ac_vlc_length;
3830             last_length= s->intra_ac_vlc_last_length;
3831         }
3832     } else {
3833         start_i = 0;
3834         last_non_zero = -1;
3835         qmat = s->q_inter_matrix[qscale];
3836         matrix = s->inter_matrix;
3837         length     = s->inter_ac_vlc_length;
3838         last_length= s->inter_ac_vlc_last_length;
3839     }
3840     last_i= start_i;
3841
3842     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3843     threshold2= (threshold1<<1);
3844
3845     for(i=63; i>=start_i; i--) {
3846         const int j = scantable[i];
3847         int level = block[j] * qmat[j];
3848
3849         if(((unsigned)(level+threshold1))>threshold2){
3850             last_non_zero = i;
3851             break;
3852         }
3853     }
3854
3855     for(i=start_i; i<=last_non_zero; i++) {
3856         const int j = scantable[i];
3857         int level = block[j] * qmat[j];
3858
3859 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3860 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3861         if(((unsigned)(level+threshold1))>threshold2){
3862             if(level>0){
3863                 level= (bias + level)>>QMAT_SHIFT;
3864                 coeff[0][i]= level;
3865                 coeff[1][i]= level-1;
3866 //                coeff[2][k]= level-2;
3867             }else{
3868                 level= (bias - level)>>QMAT_SHIFT;
3869                 coeff[0][i]= -level;
3870                 coeff[1][i]= -level+1;
3871 //                coeff[2][k]= -level+2;
3872             }
3873             coeff_count[i]= FFMIN(level, 2);
3874             av_assert2(coeff_count[i]);
3875             max |=level;
3876         }else{
3877             coeff[0][i]= (level>>31)|1;
3878             coeff_count[i]= 1;
3879         }
3880     }
3881
3882     *overflow= s->max_qcoeff < max; //overflow might have happened
3883
3884     if(last_non_zero < start_i){
3885         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3886         return last_non_zero;
3887     }
3888
3889     score_tab[start_i]= 0;
3890     survivor[0]= start_i;
3891     survivor_count= 1;
3892
3893     for(i=start_i; i<=last_non_zero; i++){
3894         int level_index, j, zero_distortion;
3895         int dct_coeff= FFABS(block[ scantable[i] ]);
3896         int best_score=256*256*256*120;
3897
3898         if (s->fdsp.fdct == ff_fdct_ifast)
3899             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3900         zero_distortion= dct_coeff*dct_coeff;
3901
3902         for(level_index=0; level_index < coeff_count[i]; level_index++){
3903             int distortion;
3904             int level= coeff[level_index][i];
3905             const int alevel= FFABS(level);
3906             int unquant_coeff;
3907
3908             av_assert2(level);
3909
3910             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3911                 unquant_coeff= alevel*qmul + qadd;
3912             } else if(s->out_format == FMT_MJPEG) {
3913                 j = s->idsp.idct_permutation[scantable[i]];
3914                 unquant_coeff = alevel * matrix[j] * 8;
3915             }else{ //MPEG1
3916                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3917                 if(s->mb_intra){
3918                         unquant_coeff = (int)(  alevel  * qscale * matrix[j]) >> 3;
3919                         unquant_coeff =   (unquant_coeff - 1) | 1;
3920                 }else{
3921                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[j])) >> 4;
3922                         unquant_coeff =   (unquant_coeff - 1) | 1;
3923                 }
3924                 unquant_coeff<<= 3;
3925             }
3926
3927             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3928             level+=64;
3929             if((level&(~127)) == 0){
3930                 for(j=survivor_count-1; j>=0; j--){
3931                     int run= i - survivor[j];
3932                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3933                     score += score_tab[i-run];
3934
3935                     if(score < best_score){
3936                         best_score= score;
3937                         run_tab[i+1]= run;
3938                         level_tab[i+1]= level-64;
3939                     }
3940                 }
3941
3942                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3943                     for(j=survivor_count-1; j>=0; j--){
3944                         int run= i - survivor[j];
3945                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3946                         score += score_tab[i-run];
3947                         if(score < last_score){
3948                             last_score= score;
3949                             last_run= run;
3950                             last_level= level-64;
3951                             last_i= i+1;
3952                         }
3953                     }
3954                 }
3955             }else{
3956                 distortion += esc_length*lambda;
3957                 for(j=survivor_count-1; j>=0; j--){
3958                     int run= i - survivor[j];
3959                     int score= distortion + score_tab[i-run];
3960
3961                     if(score < best_score){
3962                         best_score= score;
3963                         run_tab[i+1]= run;
3964                         level_tab[i+1]= level-64;
3965                     }
3966                 }
3967
3968                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3969                   for(j=survivor_count-1; j>=0; j--){
3970                         int run= i - survivor[j];
3971                         int score= distortion + score_tab[i-run];
3972                         if(score < last_score){
3973                             last_score= score;
3974                             last_run= run;
3975                             last_level= level-64;
3976                             last_i= i+1;
3977                         }
3978                     }
3979                 }
3980             }
3981         }
3982
3983         score_tab[i+1]= best_score;
3984
3985         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3986         if(last_non_zero <= 27){
3987             for(; survivor_count; survivor_count--){
3988                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3989                     break;
3990             }
3991         }else{
3992             for(; survivor_count; survivor_count--){
3993                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3994                     break;
3995             }
3996         }
3997
3998         survivor[ survivor_count++ ]= i+1;
3999     }
4000
4001     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4002         last_score= 256*256*256*120;
4003         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4004             int score= score_tab[i];
4005             if(i) score += lambda*2; //FIXME exacter?
4006
4007             if(score < last_score){
4008                 last_score= score;
4009                 last_i= i;
4010                 last_level= level_tab[i];
4011                 last_run= run_tab[i];
4012             }
4013         }
4014     }
4015
4016     s->coded_score[n] = last_score;
4017
4018     dc= FFABS(block[0]);
4019     last_non_zero= last_i - 1;
4020     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4021
4022     if(last_non_zero < start_i)
4023         return last_non_zero;
4024
4025     if(last_non_zero == 0 && start_i == 0){
4026         int best_level= 0;
4027         int best_score= dc * dc;
4028
4029         for(i=0; i<coeff_count[0]; i++){
4030             int level= coeff[i][0];
4031             int alevel= FFABS(level);
4032             int unquant_coeff, score, distortion;
4033
4034             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4035                     unquant_coeff= (alevel*qmul + qadd)>>3;
4036             }else{ //MPEG1
4037                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[0])) >> 4;
4038                     unquant_coeff =   (unquant_coeff - 1) | 1;
4039             }
4040             unquant_coeff = (unquant_coeff + 4) >> 3;
4041             unquant_coeff<<= 3 + 3;
4042
4043             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4044             level+=64;
4045             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4046             else                    score= distortion + esc_length*lambda;
4047
4048             if(score < best_score){
4049                 best_score= score;
4050                 best_level= level - 64;
4051             }
4052         }
4053         block[0]= best_level;
4054         s->coded_score[n] = best_score - dc*dc;
4055         if(best_level == 0) return -1;
4056         else                return last_non_zero;
4057     }
4058
4059     i= last_i;
4060     av_assert2(last_level);
4061
4062     block[ perm_scantable[last_non_zero] ]= last_level;
4063     i -= last_run + 1;
4064
4065     for(; i>start_i; i -= run_tab[i] + 1){
4066         block[ perm_scantable[i-1] ]= level_tab[i];
4067     }
4068
4069     return last_non_zero;
4070 }
4071
4072 //#define REFINE_STATS 1
4073 static int16_t basis[64][64];
4074
4075 static void build_basis(uint8_t *perm){
4076     int i, j, x, y;
4077     emms_c();
4078     for(i=0; i<8; i++){
4079         for(j=0; j<8; j++){
4080             for(y=0; y<8; y++){
4081                 for(x=0; x<8; x++){
4082                     double s= 0.25*(1<<BASIS_SHIFT);
4083                     int index= 8*i + j;
4084                     int perm_index= perm[index];
4085                     if(i==0) s*= sqrt(0.5);
4086                     if(j==0) s*= sqrt(0.5);
4087                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4088                 }
4089             }
4090         }
4091     }
4092 }
4093
4094 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4095                         int16_t *block, int16_t *weight, int16_t *orig,
4096                         int n, int qscale){
4097     int16_t rem[64];
4098     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4099     const uint8_t *scantable= s->intra_scantable.scantable;
4100     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4101 //    unsigned int threshold1, threshold2;
4102 //    int bias=0;
4103     int run_tab[65];
4104     int prev_run=0;
4105     int prev_level=0;
4106     int qmul, qadd, start_i, last_non_zero, i, dc;
4107     uint8_t * length;
4108     uint8_t * last_length;
4109     int lambda;
4110     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4111 #ifdef REFINE_STATS
4112 static int count=0;
4113 static int after_last=0;
4114 static int to_zero=0;
4115 static int from_zero=0;
4116 static int raise=0;
4117 static int lower=0;
4118 static int messed_sign=0;
4119 #endif
4120
4121     if(basis[0][0] == 0)
4122         build_basis(s->idsp.idct_permutation);
4123
4124     qmul= qscale*2;
4125     qadd= (qscale-1)|1;
4126     if (s->mb_intra) {
4127         if (!s->h263_aic) {
4128             if (n < 4)
4129                 q = s->y_dc_scale;
4130             else
4131                 q = s->c_dc_scale;
4132         } else{
4133             /* For AIC we skip quant/dequant of INTRADC */
4134             q = 1;
4135             qadd=0;
4136         }
4137         q <<= RECON_SHIFT-3;
4138         /* note: block[0] is assumed to be positive */
4139         dc= block[0]*q;
4140 //        block[0] = (block[0] + (q >> 1)) / q;
4141         start_i = 1;
4142 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4143 //            bias= 1<<(QMAT_SHIFT-1);
4144         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4145             length     = s->intra_chroma_ac_vlc_length;
4146             last_length= s->intra_chroma_ac_vlc_last_length;
4147         } else {
4148             length     = s->intra_ac_vlc_length;
4149             last_length= s->intra_ac_vlc_last_length;
4150         }
4151     } else {
4152         dc= 0;
4153         start_i = 0;
4154         length     = s->inter_ac_vlc_length;
4155         last_length= s->inter_ac_vlc_last_length;
4156     }
4157     last_non_zero = s->block_last_index[n];
4158
4159 #ifdef REFINE_STATS
4160 {START_TIMER
4161 #endif
4162     dc += (1<<(RECON_SHIFT-1));
4163     for(i=0; i<64; i++){
4164         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4165     }
4166 #ifdef REFINE_STATS
4167 STOP_TIMER("memset rem[]")}
4168 #endif
4169     sum=0;
4170     for(i=0; i<64; i++){
4171         int one= 36;
4172         int qns=4;
4173         int w;
4174
4175         w= FFABS(weight[i]) + qns*one;
4176         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4177
4178         weight[i] = w;
4179 //        w=weight[i] = (63*qns + (w/2)) / w;
4180
4181         av_assert2(w>0);
4182         av_assert2(w<(1<<6));
4183         sum += w*w;
4184     }
4185     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4186 #ifdef REFINE_STATS
4187 {START_TIMER
4188 #endif
4189     run=0;
4190     rle_index=0;
4191     for(i=start_i; i<=last_non_zero; i++){
4192         int j= perm_scantable[i];
4193         const int level= block[j];
4194         int coeff;
4195
4196         if(level){
4197             if(level<0) coeff= qmul*level - qadd;
4198             else        coeff= qmul*level + qadd;
4199             run_tab[rle_index++]=run;
4200             run=0;
4201
4202             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4203         }else{
4204             run++;
4205         }
4206     }
4207 #ifdef REFINE_STATS
4208 if(last_non_zero>0){
4209 STOP_TIMER("init rem[]")
4210 }
4211 }
4212
4213 {START_TIMER
4214 #endif
4215     for(;;){
4216         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4217         int best_coeff=0;
4218         int best_change=0;
4219         int run2, best_unquant_change=0, analyze_gradient;
4220 #ifdef REFINE_STATS
4221 {START_TIMER
4222 #endif
4223         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4224
4225         if(analyze_gradient){
4226 #ifdef REFINE_STATS
4227 {START_TIMER
4228 #endif
4229             for(i=0; i<64; i++){
4230                 int w= weight[i];
4231
4232                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4233             }
4234 #ifdef REFINE_STATS
4235 STOP_TIMER("rem*w*w")}
4236 {START_TIMER
4237 #endif
4238             s->fdsp.fdct(d1);
4239 #ifdef REFINE_STATS
4240 STOP_TIMER("dct")}
4241 #endif
4242         }
4243
4244         if(start_i){
4245             const int level= block[0];
4246             int change, old_coeff;
4247
4248             av_assert2(s->mb_intra);
4249
4250             old_coeff= q*level;
4251
4252             for(change=-1; change<=1; change+=2){
4253                 int new_level= level + change;
4254                 int score, new_coeff;
4255
4256                 new_coeff= q*new_level;
4257                 if(new_coeff >= 2048 || new_coeff < 0)
4258                     continue;
4259
4260                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4261                                                   new_coeff - old_coeff);
4262                 if(score<best_score){
4263                     best_score= score;
4264                     best_coeff= 0;
4265                     best_change= change;
4266                     best_unquant_change= new_coeff - old_coeff;
4267                 }
4268             }
4269         }
4270
4271         run=0;
4272         rle_index=0;
4273         run2= run_tab[rle_index++];
4274         prev_level=0;
4275         prev_run=0;
4276
4277         for(i=start_i; i<64; i++){
4278             int j= perm_scantable[i];
4279             const int level= block[j];
4280             int change, old_coeff;
4281
4282             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4283                 break;
4284
4285             if(level){
4286                 if(level<0) old_coeff= qmul*level - qadd;
4287                 else        old_coeff= qmul*level + qadd;
4288                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4289             }else{
4290                 old_coeff=0;
4291                 run2--;
4292                 av_assert2(run2>=0 || i >= last_non_zero );
4293             }
4294
4295             for(change=-1; change<=1; change+=2){
4296                 int new_level= level + change;
4297                 int score, new_coeff, unquant_change;
4298
4299                 score=0;
4300                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4301                    continue;
4302
4303                 if(new_level){
4304                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4305                     else            new_coeff= qmul*new_level + qadd;
4306                     if(new_coeff >= 2048 || new_coeff <= -2048)
4307                         continue;
4308                     //FIXME check for overflow
4309
4310                     if(level){
4311                         if(level < 63 && level > -63){
4312                             if(i < last_non_zero)
4313                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4314                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4315                             else
4316                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4317                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4318                         }
4319                     }else{
4320                         av_assert2(FFABS(new_level)==1);
4321
4322                         if(analyze_gradient){
4323                             int g= d1[ scantable[i] ];
4324                             if(g && (g^new_level) >= 0)
4325                                 continue;
4326                         }
4327
4328                         if(i < last_non_zero){
4329                             int next_i= i + run2 + 1;
4330                             int next_level= block[ perm_scantable[next_i] ] + 64;
4331
4332                             if(next_level&(~127))
4333                                 next_level= 0;
4334
4335                             if(next_i < last_non_zero)
4336                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4337                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4338                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4339                             else
4340                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4341                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4342                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4343                         }else{
4344                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4345                             if(prev_level){
4346                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4347                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4348                             }
4349                         }
4350                     }
4351                 }else{
4352                     new_coeff=0;
4353                     av_assert2(FFABS(level)==1);
4354
4355                     if(i < last_non_zero){
4356                         int next_i= i + run2 + 1;
4357                         int next_level= block[ perm_scantable[next_i] ] + 64;
4358
4359                         if(next_level&(~127))
4360                             next_level= 0;
4361
4362                         if(next_i < last_non_zero)
4363                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4364                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4365                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4366                         else
4367                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4368                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4369                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4370                     }else{
4371                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4372                         if(prev_level){
4373                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4374                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4375                         }
4376                     }
4377                 }
4378
4379                 score *= lambda;
4380
4381                 unquant_change= new_coeff - old_coeff;
4382                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4383
4384                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4385                                                    unquant_change);
4386                 if(score<best_score){
4387                     best_score= score;
4388                     best_coeff= i;
4389                     best_change= change;
4390                     best_unquant_change= unquant_change;
4391                 }
4392             }
4393             if(level){
4394                 prev_level= level + 64;
4395                 if(prev_level&(~127))
4396                     prev_level= 0;
4397                 prev_run= run;
4398                 run=0;
4399             }else{
4400                 run++;
4401             }
4402         }
4403 #ifdef REFINE_STATS
4404 STOP_TIMER("iterative step")}
4405 #endif
4406
4407         if(best_change){
4408             int j= perm_scantable[ best_coeff ];
4409
4410             block[j] += best_change;
4411
4412             if(best_coeff > last_non_zero){
4413                 last_non_zero= best_coeff;
4414                 av_assert2(block[j]);
4415 #ifdef REFINE_STATS
4416 after_last++;
4417 #endif
4418             }else{
4419 #ifdef REFINE_STATS
4420 if(block[j]){
4421     if(block[j] - best_change){
4422         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4423             raise++;
4424         }else{
4425             lower++;
4426         }
4427     }else{
4428         from_zero++;
4429     }
4430 }else{
4431     to_zero++;
4432 }
4433 #endif
4434                 for(; last_non_zero>=start_i; last_non_zero--){
4435                     if(block[perm_scantable[last_non_zero]])
4436                         break;
4437                 }
4438             }
4439 #ifdef REFINE_STATS
4440 count++;
4441 if(256*256*256*64 % count == 0){
4442     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4443 }
4444 #endif
4445             run=0;
4446             rle_index=0;
4447             for(i=start_i; i<=last_non_zero; i++){
4448                 int j= perm_scantable[i];
4449                 const int level= block[j];
4450
4451                  if(level){
4452                      run_tab[rle_index++]=run;
4453                      run=0;
4454                  }else{
4455                      run++;
4456                  }
4457             }
4458
4459             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4460         }else{
4461             break;
4462         }
4463     }
4464 #ifdef REFINE_STATS
4465 if(last_non_zero>0){
4466 STOP_TIMER("iterative search")
4467 }
4468 }
4469 #endif
4470
4471     return last_non_zero;
4472 }
4473
4474 int ff_dct_quantize_c(MpegEncContext *s,
4475                         int16_t *block, int n,
4476                         int qscale, int *overflow)
4477 {
4478     int i, j, level, last_non_zero, q, start_i;
4479     const int *qmat;
4480     const uint8_t *scantable= s->intra_scantable.scantable;
4481     int bias;
4482     int max=0;
4483     unsigned int threshold1, threshold2;
4484
4485     s->fdsp.fdct(block);
4486
4487     if(s->dct_error_sum)
4488         s->denoise_dct(s, block);
4489
4490     if (s->mb_intra) {
4491         if (!s->h263_aic) {
4492             if (n < 4)
4493                 q = s->y_dc_scale;
4494             else
4495                 q = s->c_dc_scale;
4496             q = q << 3;
4497         } else
4498             /* For AIC we skip quant/dequant of INTRADC */
4499             q = 1 << 3;
4500
4501         /* note: block[0] is assumed to be positive */
4502         block[0] = (block[0] + (q >> 1)) / q;
4503         start_i = 1;
4504         last_non_zero = 0;
4505         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4506         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4507     } else {
4508         start_i = 0;
4509         last_non_zero = -1;
4510         qmat = s->q_inter_matrix[qscale];
4511         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4512     }
4513     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4514     threshold2= (threshold1<<1);
4515     for(i=63;i>=start_i;i--) {
4516         j = scantable[i];
4517         level = block[j] * qmat[j];
4518
4519         if(((unsigned)(level+threshold1))>threshold2){
4520             last_non_zero = i;
4521             break;
4522         }else{
4523             block[j]=0;
4524         }
4525     }
4526     for(i=start_i; i<=last_non_zero; i++) {
4527         j = scantable[i];
4528         level = block[j] * qmat[j];
4529
4530 //        if(   bias+level >= (1<<QMAT_SHIFT)
4531 //           || bias-level >= (1<<QMAT_SHIFT)){
4532         if(((unsigned)(level+threshold1))>threshold2){
4533             if(level>0){
4534                 level= (bias + level)>>QMAT_SHIFT;
4535                 block[j]= level;
4536             }else{
4537                 level= (bias - level)>>QMAT_SHIFT;
4538                 block[j]= -level;
4539             }
4540             max |=level;
4541         }else{
4542             block[j]=0;
4543         }
4544     }
4545     *overflow= s->max_qcoeff < max; //overflow might have happened
4546
4547     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4548     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4549         ff_block_permute(block, s->idsp.idct_permutation,
4550                          scantable, last_non_zero);
4551
4552     return last_non_zero;
4553 }
4554
4555 #define OFFSET(x) offsetof(MpegEncContext, x)
4556 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4557 static const AVOption h263_options[] = {
4558     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4559     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4560     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4561     FF_MPV_COMMON_OPTS
4562     { NULL },
4563 };
4564
4565 static const AVClass h263_class = {
4566     .class_name = "H.263 encoder",
4567     .item_name  = av_default_item_name,
4568     .option     = h263_options,
4569     .version    = LIBAVUTIL_VERSION_INT,
4570 };
4571
4572 AVCodec ff_h263_encoder = {
4573     .name           = "h263",
4574     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4575     .type           = AVMEDIA_TYPE_VIDEO,
4576     .id             = AV_CODEC_ID_H263,
4577     .priv_data_size = sizeof(MpegEncContext),
4578     .init           = ff_mpv_encode_init,
4579     .encode2        = ff_mpv_encode_picture,
4580     .close          = ff_mpv_encode_end,
4581     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4582     .priv_class     = &h263_class,
4583 };
4584
4585 static const AVOption h263p_options[] = {
4586     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4587     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4588     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4589     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4590     FF_MPV_COMMON_OPTS
4591     { NULL },
4592 };
4593 static const AVClass h263p_class = {
4594     .class_name = "H.263p encoder",
4595     .item_name  = av_default_item_name,
4596     .option     = h263p_options,
4597     .version    = LIBAVUTIL_VERSION_INT,
4598 };
4599
4600 AVCodec ff_h263p_encoder = {
4601     .name           = "h263p",
4602     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4603     .type           = AVMEDIA_TYPE_VIDEO,
4604     .id             = AV_CODEC_ID_H263P,
4605     .priv_data_size = sizeof(MpegEncContext),
4606     .init           = ff_mpv_encode_init,
4607     .encode2        = ff_mpv_encode_picture,
4608     .close          = ff_mpv_encode_end,
4609     .capabilities   = CODEC_CAP_SLICE_THREADS,
4610     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4611     .priv_class     = &h263p_class,
4612 };
4613
4614 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4615
4616 AVCodec ff_msmpeg4v2_encoder = {
4617     .name           = "msmpeg4v2",
4618     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4619     .type           = AVMEDIA_TYPE_VIDEO,
4620     .id             = AV_CODEC_ID_MSMPEG4V2,
4621     .priv_data_size = sizeof(MpegEncContext),
4622     .init           = ff_mpv_encode_init,
4623     .encode2        = ff_mpv_encode_picture,
4624     .close          = ff_mpv_encode_end,
4625     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4626     .priv_class     = &msmpeg4v2_class,
4627 };
4628
4629 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4630
4631 AVCodec ff_msmpeg4v3_encoder = {
4632     .name           = "msmpeg4",
4633     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4634     .type           = AVMEDIA_TYPE_VIDEO,
4635     .id             = AV_CODEC_ID_MSMPEG4V3,
4636     .priv_data_size = sizeof(MpegEncContext),
4637     .init           = ff_mpv_encode_init,
4638     .encode2        = ff_mpv_encode_picture,
4639     .close          = ff_mpv_encode_end,
4640     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4641     .priv_class     = &msmpeg4v3_class,
4642 };
4643
4644 FF_MPV_GENERIC_CLASS(wmv1)
4645
4646 AVCodec ff_wmv1_encoder = {
4647     .name           = "wmv1",
4648     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4649     .type           = AVMEDIA_TYPE_VIDEO,
4650     .id             = AV_CODEC_ID_WMV1,
4651     .priv_data_size = sizeof(MpegEncContext),
4652     .init           = ff_mpv_encode_init,
4653     .encode2        = ff_mpv_encode_picture,
4654     .close          = ff_mpv_encode_end,
4655     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4656     .priv_class     = &wmv1_class,
4657 };