git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include "libavutil/intmath.h"
  31 #include "libavutil/mathematics.h"
  32 #include "libavutil/opt.h"
  33 #include "avcodec.h"
  34 #include "dsputil.h"
  35 #include "mpegvideo.h"
  36 #include "mpegvideo_common.h"
  37 #include "h263.h"
  38 #include "mjpegenc.h"
  39 #include "msmpeg4.h"
  40 #include "faandct.h"
  41 #include "thread.h"
  42 #include "aandcttab.h"
  43 #include "flv.h"
  44 #include "mpeg4video.h"
  45 #include "internal.h"
  46 #include <limits.h>
  47 #include "sp5x.h"
  48
  49 //#undef NDEBUG
  50 //#include <assert.h>
  51
  52 static int encode_picture(MpegEncContext *s, int picture_number);
  53 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  54 static int sse_mb(MpegEncContext *s);
  55 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  56 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  57
  58 /* enable all paranoid tests for rounding, overflows, etc... */
  59 //#define PARANOID
  60
  61 //#define DEBUG
  62
  63 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  64 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  65
  66 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
  67                        uint16_t (*qmat16)[2][64],
  68                        const uint16_t *quant_matrix,
  69                        int bias, int qmin, int qmax, int intra)
  70 {
  71     int qscale;
  72     int shift = 0;
  73
  74     for (qscale = qmin; qscale <= qmax; qscale++) {
  75         int i;
  76         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
  77             dsp->fdct == ff_jpeg_fdct_islow_10
  78 #ifdef FAAN_POSTSCALE
  79             || dsp->fdct == ff_faandct
  80 #endif
  81             ) {
  82             for (i = 0; i < 64; i++) {
  83                 const int j = dsp->idct_permutation[i];
  84                 /* 16 <= qscale * quant_matrix[i] <= 7905
  85                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  86                  *             19952 <=              x  <= 249205026
  87                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
  88                  *           3444240 >= (1 << 36) / (x) >= 275 */
  89
  90                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
  91                                         (qscale * quant_matrix[j]));
  92             }
  93         } else if (dsp->fdct == fdct_ifast
  94 #ifndef FAAN_POSTSCALE
  95                    || dsp->fdct == ff_faandct
  96 #endif
  97                    ) {
  98             for (i = 0; i < 64; i++) {
  99                 const int j = dsp->idct_permutation[i];
 100                 /* 16 <= qscale * quant_matrix[i] <= 7905
 101                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 102                  *             19952 <=              x  <= 249205026
 103                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 104                  *           3444240 >= (1 << 36) / (x) >= 275 */
 105
 106                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 107                                         (ff_aanscales[i] * qscale * quant_matrix[j]));
 108             }
 109         } else {
 110             for (i = 0; i < 64; i++) {
 111                 const int j = dsp->idct_permutation[i];
 112                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 113                  * Assume x = qscale * quant_matrix[i]
 114                  * So             16 <=              x  <= 7905
 115                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 116                  * so          32768 >= (1 << 19) / (x) >= 67 */
 117                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 118                                         (qscale * quant_matrix[j]));
 119                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 120                 //                    (qscale * quant_matrix[i]);
 121                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 122                                        (qscale * quant_matrix[j]);
 123
 124                 if (qmat16[qscale][0][i] == 0 ||
 125                     qmat16[qscale][0][i] == 128 * 256)
 126                     qmat16[qscale][0][i] = 128 * 256 - 1;
 127                 qmat16[qscale][1][i] =
 128                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 129                                 qmat16[qscale][0][i]);
 130             }
 131         }
 132
 133         for (i = intra; i < 64; i++) {
 134             int64_t max = 8191;
 135             if (dsp->fdct == fdct_ifast
 136 #ifndef FAAN_POSTSCALE
 137                 || dsp->fdct == ff_faandct
 138 #endif
 139                ) {
 140                 max = (8191LL * ff_aanscales[i]) >> 14;
 141             }
 142             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 143                 shift++;
 144             }
 145         }
 146     }
 147     if (shift) {
 148         av_log(NULL, AV_LOG_INFO,
 149                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 150                QMAT_SHIFT - shift);
 151     }
 152 }
 153
 154 static inline void update_qscale(MpegEncContext *s)
 155 {
 156     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 157                 (FF_LAMBDA_SHIFT + 7);
 158     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 159
 160     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 161                  FF_LAMBDA_SHIFT;
 162 }
 163
 164 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 165 {
 166     int i;
 167
 168     if (matrix) {
 169         put_bits(pb, 1, 1);
 170         for (i = 0; i < 64; i++) {
 171             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 172         }
 173     } else
 174         put_bits(pb, 1, 0);
 175 }
 176
 177 /**
 178  * init s->current_picture.qscale_table from s->lambda_table
 179  */
 180 void ff_init_qscale_tab(MpegEncContext *s)
 181 {
 182     int8_t * const qscale_table = s->current_picture.f.qscale_table;
 183     int i;
 184
 185     for (i = 0; i < s->mb_num; i++) {
 186         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 187         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 188         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 189                                                   s->avctx->qmax);
 190     }
 191 }
 192
 193 static void copy_picture_attributes(MpegEncContext *s,
 194                                     AVFrame *dst,
 195                                     AVFrame *src)
 196 {
 197     int i;
 198
 199     dst->pict_type              = src->pict_type;
 200     dst->quality                = src->quality;
 201     dst->coded_picture_number   = src->coded_picture_number;
 202     dst->display_picture_number = src->display_picture_number;
 203     //dst->reference              = src->reference;
 204     dst->pts                    = src->pts;
 205     dst->interlaced_frame       = src->interlaced_frame;
 206     dst->top_field_first        = src->top_field_first;
 207
 208     if (s->avctx->me_threshold) {
 209         if (!src->motion_val[0])
 210             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
 211         if (!src->mb_type)
 212             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
 213         if (!src->ref_index[0])
 214             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
 215         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
 216             av_log(s->avctx, AV_LOG_ERROR,
 217                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
 218                    src->motion_subsample_log2, dst->motion_subsample_log2);
 219
 220         memcpy(dst->mb_type, src->mb_type,
 221                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
 222
 223         for (i = 0; i < 2; i++) {
 224             int stride = ((16 * s->mb_width ) >>
 225                           src->motion_subsample_log2) + 1;
 226             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
 227
 228             if (src->motion_val[i] &&
 229                 src->motion_val[i] != dst->motion_val[i]) {
 230                 memcpy(dst->motion_val[i], src->motion_val[i],
 231                        2 * stride * height * sizeof(int16_t));
 232             }
 233             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
 234                 memcpy(dst->ref_index[i], src->ref_index[i],
 235                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
 236             }
 237         }
 238     }
 239 }
 240
 241 static void update_duplicate_context_after_me(MpegEncContext *dst,
 242                                               MpegEncContext *src)
 243 {
 244 #define COPY(a) dst->a= src->a
 245     COPY(pict_type);
 246     COPY(current_picture);
 247     COPY(f_code);
 248     COPY(b_code);
 249     COPY(qscale);
 250     COPY(lambda);
 251     COPY(lambda2);
 252     COPY(picture_in_gop_number);
 253     COPY(gop_picture_number);
 254     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 255     COPY(progressive_frame);    // FIXME don't set in encode_header
 256     COPY(partitioned_frame);    // FIXME don't set in encode_header
 257 #undef COPY
 258 }
 259
 260 /**
 261  * Set the given MpegEncContext to defaults for encoding.
 262  * the changed fields will not depend upon the prior state of the MpegEncContext.
 263  */
 264 static void MPV_encode_defaults(MpegEncContext *s)
 265 {
 266     int i;
 267     MPV_common_defaults(s);
 268
 269     for (i = -16; i < 16; i++) {
 270         default_fcode_tab[i + MAX_MV] = 1;
 271     }
 272     s->me.mv_penalty = default_mv_penalty;
 273     s->fcode_tab     = default_fcode_tab;
 274 }
 275
 276 /* init video encoder */
 277 av_cold int MPV_encode_init(AVCodecContext *avctx)
 278 {
 279     MpegEncContext *s = avctx->priv_data;
 280     int i;
 281     int chroma_h_shift, chroma_v_shift;
 282
 283     MPV_encode_defaults(s);
 284
 285     switch (avctx->codec_id) {
 286     case CODEC_ID_MPEG2VIDEO:
 287         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
 288             avctx->pix_fmt != PIX_FMT_YUV422P) {
 289             av_log(avctx, AV_LOG_ERROR,
 290                    "only YUV420 and YUV422 are supported\n");
 291             return -1;
 292         }
 293         break;
 294     case CODEC_ID_LJPEG:
 295         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
 296             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
 297             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
 298             avctx->pix_fmt != PIX_FMT_BGR0     &&
 299             avctx->pix_fmt != PIX_FMT_BGRA     &&
 300             avctx->pix_fmt != PIX_FMT_BGR24    &&
 301             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
 302               avctx->pix_fmt != PIX_FMT_YUV422P &&
 303               avctx->pix_fmt != PIX_FMT_YUV444P) ||
 304              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
 305             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
 306             return -1;
 307         }
 308         break;
 309     case CODEC_ID_MJPEG:
 310     case CODEC_ID_AMV:
 311         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
 312             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
 313             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
 314               avctx->pix_fmt != PIX_FMT_YUV422P) ||
 315              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
 316             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 317             return -1;
 318         }
 319         break;
 320     default:
 321         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
 322             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 323             return -1;
 324         }
 325     }
 326
 327     switch (avctx->pix_fmt) {
 328     case PIX_FMT_YUVJ422P:
 329     case PIX_FMT_YUV422P:
 330         s->chroma_format = CHROMA_422;
 331         break;
 332     case PIX_FMT_YUVJ420P:
 333     case PIX_FMT_YUV420P:
 334     default:
 335         s->chroma_format = CHROMA_420;
 336         break;
 337     }
 338
 339     s->bit_rate = avctx->bit_rate;
 340     s->width    = avctx->width;
 341     s->height   = avctx->height;
 342     if (avctx->gop_size > 600 &&
 343         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 344         av_log(avctx, AV_LOG_WARNING,
 345                "keyframe interval too large!, reducing it from %d to %d\n",
 346                avctx->gop_size, 600);
 347         avctx->gop_size = 600;
 348     }
 349     s->gop_size     = avctx->gop_size;
 350     s->avctx        = avctx;
 351     s->flags        = avctx->flags;
 352     s->flags2       = avctx->flags2;
 353     s->max_b_frames = avctx->max_b_frames;
 354     s->codec_id     = avctx->codec->id;
 355     s->luma_elim_threshold   = avctx->luma_elim_threshold;
 356     s->chroma_elim_threshold = avctx->chroma_elim_threshold;
 357     s->strict_std_compliance = avctx->strict_std_compliance;
 358     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 359     s->mpeg_quant         = avctx->mpeg_quant;
 360     s->rtp_mode           = !!avctx->rtp_payload_size;
 361     s->intra_dc_precision = avctx->intra_dc_precision;
 362     s->user_specified_pts = AV_NOPTS_VALUE;
 363
 364     if (s->gop_size <= 1) {
 365         s->intra_only = 1;
 366         s->gop_size   = 12;
 367     } else {
 368         s->intra_only = 0;
 369     }
 370
 371     s->me_method = avctx->me_method;
 372
 373     /* Fixed QSCALE */
 374     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 375
 376     s->adaptive_quant = (s->avctx->lumi_masking ||
 377                          s->avctx->dark_masking ||
 378                          s->avctx->temporal_cplx_masking ||
 379                          s->avctx->spatial_cplx_masking  ||
 380                          s->avctx->p_masking      ||
 381                          s->avctx->border_masking ||
 382                          (s->flags & CODEC_FLAG_QP_RD)) &&
 383                         !s->fixed_qscale;
 384
 385     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 386
 387     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 388         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 389         return -1;
 390     }
 391
 392     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 393         av_log(avctx, AV_LOG_INFO,
 394                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 395     }
 396
 397     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 398         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 399         return -1;
 400     }
 401
 402     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 403         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 404         return -1;
 405     }
 406
 407     if (avctx->rc_max_rate &&
 408         avctx->rc_max_rate == avctx->bit_rate &&
 409         avctx->rc_max_rate != avctx->rc_min_rate) {
 410         av_log(avctx, AV_LOG_INFO,
 411                "impossible bitrate constraints, this will fail\n");
 412     }
 413
 414     if (avctx->rc_buffer_size &&
 415         avctx->bit_rate * (int64_t)avctx->time_base.num >
 416             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 417         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 418         return -1;
 419     }
 420
 421     if (!s->fixed_qscale &&
 422         avctx->bit_rate * av_q2d(avctx->time_base) >
 423             avctx->bit_rate_tolerance) {
 424         av_log(avctx, AV_LOG_ERROR,
 425                "bitrate tolerance too small for bitrate\n");
 426         return -1;
 427     }
 428
 429     if (s->avctx->rc_max_rate &&
 430         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 431         (s->codec_id == CODEC_ID_MPEG1VIDEO ||
 432          s->codec_id == CODEC_ID_MPEG2VIDEO) &&
 433         90000LL * (avctx->rc_buffer_size - 1) >
 434             s->avctx->rc_max_rate * 0xFFFFLL) {
 435         av_log(avctx, AV_LOG_INFO,
 436                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 437                "specified vbv buffer is too large for the given bitrate!\n");
 438     }
 439
 440     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != CODEC_ID_MPEG4 &&
 441         s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P &&
 442         s->codec_id != CODEC_ID_FLV1) {
 443         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 444         return -1;
 445     }
 446
 447     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 448         av_log(avctx, AV_LOG_ERROR,
 449                "OBMC is only supported with simple mb decision\n");
 450         return -1;
 451     }
 452
 453     if (s->quarter_sample && s->codec_id != CODEC_ID_MPEG4) {
 454         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 455         return -1;
 456     }
 457
 458     if (s->max_b_frames                    &&
 459         s->codec_id != CODEC_ID_MPEG4      &&
 460         s->codec_id != CODEC_ID_MPEG1VIDEO &&
 461         s->codec_id != CODEC_ID_MPEG2VIDEO) {
 462         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 463         return -1;
 464     }
 465
 466     if ((s->codec_id == CODEC_ID_MPEG4 ||
 467          s->codec_id == CODEC_ID_H263  ||
 468          s->codec_id == CODEC_ID_H263P) &&
 469         (avctx->sample_aspect_ratio.num > 255 ||
 470          avctx->sample_aspect_ratio.den > 255)) {
 471         av_log(avctx, AV_LOG_WARNING,
 472                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 473                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 474         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 475                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 476     }
 477
 478     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 479         s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO) {
 480         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 481         return -1;
 482     }
 483
 484     // FIXME mpeg2 uses that too
 485     if (s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4) {
 486         av_log(avctx, AV_LOG_ERROR,
 487                "mpeg2 style quantization not supported by codec\n");
 488         return -1;
 489     }
 490
 491     if ((s->flags & CODEC_FLAG_CBP_RD) && !avctx->trellis) {
 492         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 493         return -1;
 494     }
 495
 496     if ((s->flags & CODEC_FLAG_QP_RD) &&
 497         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 498         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 499         return -1;
 500     }
 501
 502     if (s->avctx->scenechange_threshold < 1000000000 &&
 503         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 504         av_log(avctx, AV_LOG_ERROR,
 505                "closed gop with scene change detection are not supported yet, "
 506                "set threshold to 1000000000\n");
 507         return -1;
 508     }
 509
 510     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 511         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
 512             av_log(avctx, AV_LOG_ERROR,
 513                   "low delay forcing is only available for mpeg2\n");
 514             return -1;
 515         }
 516         if (s->max_b_frames != 0) {
 517             av_log(avctx, AV_LOG_ERROR,
 518                    "b frames cannot be used with low delay\n");
 519             return -1;
 520         }
 521     }
 522
 523     if (s->q_scale_type == 1) {
 524         if (avctx->qmax > 12) {
 525             av_log(avctx, AV_LOG_ERROR,
 526                    "non linear quant only supports qmax <= 12 currently\n");
 527             return -1;
 528         }
 529     }
 530
 531     if (s->avctx->thread_count > 1         &&
 532         s->codec_id != CODEC_ID_MPEG4      &&
 533         s->codec_id != CODEC_ID_MPEG1VIDEO &&
 534         s->codec_id != CODEC_ID_MPEG2VIDEO &&
 535         (s->codec_id != CODEC_ID_H263P)) {
 536         av_log(avctx, AV_LOG_ERROR,
 537                "multi threaded encoding not supported by codec\n");
 538         return -1;
 539     }
 540
 541     if (s->avctx->thread_count < 1) {
 542         av_log(avctx, AV_LOG_ERROR,
 543                "automatic thread number detection not supported by codec, "
 544                "patch welcome\n");
 545         return -1;
 546     }
 547
 548     if (s->avctx->thread_count > 1)
 549         s->rtp_mode = 1;
 550
 551     if (!avctx->time_base.den || !avctx->time_base.num) {
 552         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 553         return -1;
 554     }
 555
 556     i = (INT_MAX / 2 + 128) >> 8;
 557     if (avctx->me_threshold >= i) {
 558         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
 559                i - 1);
 560         return -1;
 561     }
 562     if (avctx->mb_threshold >= i) {
 563         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 564                i - 1);
 565         return -1;
 566     }
 567
 568     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 569         av_log(avctx, AV_LOG_INFO,
 570                "notice: b_frame_strategy only affects the first pass\n");
 571         avctx->b_frame_strategy = 0;
 572     }
 573
 574     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 575     if (i > 1) {
 576         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 577         avctx->time_base.den /= i;
 578         avctx->time_base.num /= i;
 579         //return -1;
 580     }
 581
 582     if (s->mpeg_quant || s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || s->codec_id == CODEC_ID_MJPEG || s->codec_id==CODEC_ID_AMV) {
 583         // (a + x * 3 / 8) / x
 584         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 585         s->inter_quant_bias = 0;
 586     } else {
 587         s->intra_quant_bias = 0;
 588         // (a - x / 4) / x
 589         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 590     }
 591
 592     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 593         s->intra_quant_bias = avctx->intra_quant_bias;
 594     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 595         s->inter_quant_bias = avctx->inter_quant_bias;
 596
 597     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 598
 599     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
 600                                   &chroma_v_shift);
 601
 602     if (avctx->codec_id == CODEC_ID_MPEG4 &&
 603         s->avctx->time_base.den > (1 << 16) - 1) {
 604         av_log(avctx, AV_LOG_ERROR,
 605                "timebase %d/%d not supported by MPEG 4 standard, "
 606                "the maximum admitted value for the timebase denominator "
 607                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 608                (1 << 16) - 1);
 609         return -1;
 610     }
 611     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 612
 613     switch (avctx->codec->id) {
 614     case CODEC_ID_MPEG1VIDEO:
 615         s->out_format = FMT_MPEG1;
 616         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 617         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 618         break;
 619     case CODEC_ID_MPEG2VIDEO:
 620         s->out_format = FMT_MPEG1;
 621         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 622         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 623         s->rtp_mode   = 1;
 624         break;
 625     case CODEC_ID_LJPEG:
 626     case CODEC_ID_MJPEG:
 627     case CODEC_ID_AMV:
 628         s->out_format = FMT_MJPEG;
 629         s->intra_only = 1; /* force intra only for jpeg */
 630         if (avctx->codec->id == CODEC_ID_LJPEG &&
 631             (avctx->pix_fmt == PIX_FMT_BGR0
 632              || s->avctx->pix_fmt == PIX_FMT_BGRA
 633              || s->avctx->pix_fmt == PIX_FMT_BGR24)) {
 634             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
 635             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
 636             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
 637         } else {
 638             s->mjpeg_vsample[0] = 2;
 639             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
 640             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
 641             s->mjpeg_hsample[0] = 2;
 642             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
 643             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
 644         }
 645         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
 646             ff_mjpeg_encode_init(s) < 0)
 647             return -1;
 648         avctx->delay = 0;
 649         s->low_delay = 1;
 650         break;
 651     case CODEC_ID_H261:
 652         if (!CONFIG_H261_ENCODER)
 653             return -1;
 654         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 655             av_log(avctx, AV_LOG_ERROR,
 656                    "The specified picture size of %dx%d is not valid for the "
 657                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 658                     s->width, s->height);
 659             return -1;
 660         }
 661         s->out_format = FMT_H261;
 662         avctx->delay  = 0;
 663         s->low_delay  = 1;
 664         break;
 665     case CODEC_ID_H263:
 666         if (!CONFIG_H263_ENCODER)
 667             return -1;
 668         if (ff_match_2uint16(h263_format, FF_ARRAY_ELEMS(h263_format),
 669                              s->width, s->height) == 8) {
 670             av_log(avctx, AV_LOG_ERROR,
 671                    "The specified picture size of %dx%d is not valid for "
 672                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 673                    "352x288, 704x576, and 1408x1152. "
 674                    "Try H.263+.\n", s->width, s->height);
 675             return -1;
 676         }
 677         s->out_format = FMT_H263;
 678         avctx->delay  = 0;
 679         s->low_delay  = 1;
 680         break;
 681     case CODEC_ID_H263P:
 682         s->out_format = FMT_H263;
 683         s->h263_plus  = 1;
 684         /* Fx */
 685         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 686         s->modified_quant  = s->h263_aic;
 687         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 688         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 689
 690         /* /Fx */
 691         /* These are just to be sure */
 692         avctx->delay = 0;
 693         s->low_delay = 1;
 694         break;
 695     case CODEC_ID_FLV1:
 696         s->out_format      = FMT_H263;
 697         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 698         s->unrestricted_mv = 1;
 699         s->rtp_mode  = 0; /* don't allow GOB */
 700         avctx->delay = 0;
 701         s->low_delay = 1;
 702         break;
 703     case CODEC_ID_RV10:
 704         s->out_format = FMT_H263;
 705         avctx->delay  = 0;
 706         s->low_delay  = 1;
 707         break;
 708     case CODEC_ID_RV20:
 709         s->out_format      = FMT_H263;
 710         avctx->delay       = 0;
 711         s->low_delay       = 1;
 712         s->modified_quant  = 1;
 713         s->h263_aic        = 1;
 714         s->h263_plus       = 1;
 715         s->loop_filter     = 1;
 716         s->unrestricted_mv = 0;
 717         break;
 718     case CODEC_ID_MPEG4:
 719         s->out_format      = FMT_H263;
 720         s->h263_pred       = 1;
 721         s->unrestricted_mv = 1;
 722         s->low_delay       = s->max_b_frames ? 0 : 1;
 723         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 724         break;
 725     case CODEC_ID_MSMPEG4V2:
 726         s->out_format      = FMT_H263;
 727         s->h263_pred       = 1;
 728         s->unrestricted_mv = 1;
 729         s->msmpeg4_version = 2;
 730         avctx->delay       = 0;
 731         s->low_delay       = 1;
 732         break;
 733     case CODEC_ID_MSMPEG4V3:
 734         s->out_format        = FMT_H263;
 735         s->h263_pred         = 1;
 736         s->unrestricted_mv   = 1;
 737         s->msmpeg4_version   = 3;
 738         s->flipflop_rounding = 1;
 739         avctx->delay         = 0;
 740         s->low_delay         = 1;
 741         break;
 742     case CODEC_ID_WMV1:
 743         s->out_format        = FMT_H263;
 744         s->h263_pred         = 1;
 745         s->unrestricted_mv   = 1;
 746         s->msmpeg4_version   = 4;
 747         s->flipflop_rounding = 1;
 748         avctx->delay         = 0;
 749         s->low_delay         = 1;
 750         break;
 751     case CODEC_ID_WMV2:
 752         s->out_format        = FMT_H263;
 753         s->h263_pred         = 1;
 754         s->unrestricted_mv   = 1;
 755         s->msmpeg4_version   = 5;
 756         s->flipflop_rounding = 1;
 757         avctx->delay         = 0;
 758         s->low_delay         = 1;
 759         break;
 760     default:
 761         return -1;
 762     }
 763
 764     avctx->has_b_frames = !s->low_delay;
 765
 766     s->encoding = 1;
 767
 768     s->progressive_frame    =
 769     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 770                                                 CODEC_FLAG_INTERLACED_ME) ||
 771                                 s->alternate_scan);
 772
 773     /* init */
 774     if (MPV_common_init(s) < 0)
 775         return -1;
 776
 777     if (!s->dct_quantize)
 778         s->dct_quantize = dct_quantize_c;
 779     if (!s->denoise_dct)
 780         s->denoise_dct  = denoise_dct_c;
 781     s->fast_dct_quantize = s->dct_quantize;
 782     if (avctx->trellis)
 783         s->dct_quantize  = dct_quantize_trellis_c;
 784
 785     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 786         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 787
 788     s->quant_precision = 5;
 789
 790     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
 791     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
 792
 793     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 794         ff_h261_encode_init(s);
 795     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 796         h263_encode_init(s);
 797     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 798         ff_msmpeg4_encode_init(s);
 799     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 800         && s->out_format == FMT_MPEG1)
 801         ff_mpeg1_encode_init(s);
 802
 803     /* init q matrix */
 804     for (i = 0; i < 64; i++) {
 805         int j = s->dsp.idct_permutation[i];
 806         if (CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4 &&
 807             s->mpeg_quant) {
 808             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 809             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 810         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 811             s->intra_matrix[j] =
 812             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 813         } else {
 814             /* mpeg1/2 */
 815             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 816             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 817         }
 818         if (s->avctx->intra_matrix)
 819             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 820         if (s->avctx->inter_matrix)
 821             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 822     }
 823
 824     /* precompute matrix */
 825     /* for mjpeg, we do include qscale in the matrix */
 826     if (s->out_format != FMT_MJPEG) {
 827         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
 828                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 829                           31, 1);
 830         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
 831                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 832                           31, 0);
 833     }
 834
 835     if (ff_rate_control_init(s) < 0)
 836         return -1;
 837
 838     return 0;
 839 }
 840
 841 av_cold int MPV_encode_end(AVCodecContext *avctx)
 842 {
 843     MpegEncContext *s = avctx->priv_data;
 844
 845     ff_rate_control_uninit(s);
 846
 847     MPV_common_end(s);
 848     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
 849         s->out_format == FMT_MJPEG)
 850         ff_mjpeg_encode_close(s);
 851
 852     av_freep(&avctx->extradata);
 853
 854     return 0;
 855 }
 856
 857 static int get_sae(uint8_t *src, int ref, int stride)
 858 {
 859     int x,y;
 860     int acc = 0;
 861
 862     for (y = 0; y < 16; y++) {
 863         for (x = 0; x < 16; x++) {
 864             acc += FFABS(src[x + y * stride] - ref);
 865         }
 866     }
 867
 868     return acc;
 869 }
 870
 871 static int get_intra_count(MpegEncContext *s, uint8_t *src,
 872                            uint8_t *ref, int stride)
 873 {
 874     int x, y, w, h;
 875     int acc = 0;
 876
 877     w = s->width  & ~15;
 878     h = s->height & ~15;
 879
 880     for (y = 0; y < h; y += 16) {
 881         for (x = 0; x < w; x += 16) {
 882             int offset = x + y * stride;
 883             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
 884                                      16);
 885             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
 886             int sae  = get_sae(src + offset, mean, stride);
 887
 888             acc += sae + 500 < sad;
 889         }
 890     }
 891     return acc;
 892 }
 893
 894
 895 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
 896 {
 897     AVFrame *pic = NULL;
 898     int64_t pts;
 899     int i;
 900     const int encoding_delay = s->max_b_frames;
 901     int direct = 1;
 902
 903     if (pic_arg) {
 904         pts = pic_arg->pts;
 905         pic_arg->display_picture_number = s->input_picture_number++;
 906
 907         if (pts != AV_NOPTS_VALUE) {
 908             if (s->user_specified_pts != AV_NOPTS_VALUE) {
 909                 int64_t time = pts;
 910                 int64_t last = s->user_specified_pts;
 911
 912                 if (time <= last) {
 913                     av_log(s->avctx, AV_LOG_ERROR,
 914                            "Error, Invalid timestamp=%"PRId64", "
 915                            "last=%"PRId64"\n", pts, s->user_specified_pts);
 916                     return -1;
 917                 }
 918             }
 919             s->user_specified_pts = pts;
 920         } else {
 921             if (s->user_specified_pts != AV_NOPTS_VALUE) {
 922                 s->user_specified_pts =
 923                 pts = s->user_specified_pts + 1;
 924                 av_log(s->avctx, AV_LOG_INFO,
 925                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
 926                        pts);
 927             } else {
 928                 pts = pic_arg->display_picture_number;
 929             }
 930         }
 931     }
 932
 933   if (pic_arg) {
 934     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
 935         direct = 0;
 936     if (pic_arg->linesize[0] != s->linesize)
 937         direct = 0;
 938     if (pic_arg->linesize[1] != s->uvlinesize)
 939         direct = 0;
 940     if (pic_arg->linesize[2] != s->uvlinesize)
 941         direct = 0;
 942
 943     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
 944     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
 945
 946     if (direct) {
 947         i = ff_find_unused_picture(s, 1);
 948         if (i < 0)
 949             return i;
 950
 951         pic = (AVFrame *) &s->picture[i];
 952         pic->reference = 3;
 953
 954         for (i = 0; i < 4; i++) {
 955             pic->data[i]     = pic_arg->data[i];
 956             pic->linesize[i] = pic_arg->linesize[i];
 957         }
 958         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
 959             return -1;
 960         }
 961     } else {
 962         i = ff_find_unused_picture(s, 0);
 963         if (i < 0)
 964             return i;
 965
 966         pic = (AVFrame *) &s->picture[i];
 967         pic->reference = 3;
 968
 969         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
 970             return -1;
 971         }
 972
 973         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
 974             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
 975             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
 976             // empty
 977         } else {
 978             int h_chroma_shift, v_chroma_shift;
 979             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
 980                                           &v_chroma_shift);
 981
 982             for (i = 0; i < 3; i++) {
 983                 int src_stride = pic_arg->linesize[i];
 984                 int dst_stride = i ? s->uvlinesize : s->linesize;
 985                 int h_shift = i ? h_chroma_shift : 0;
 986                 int v_shift = i ? v_chroma_shift : 0;
 987                 int w = s->width  >> h_shift;
 988                 int h = s->height >> v_shift;
 989                 uint8_t *src = pic_arg->data[i];
 990                 uint8_t *dst = pic->data[i];
 991
 992                 if(s->codec_id == CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)){
 993                     h= ((s->height+15)/16*16)>>v_shift;
 994                 }
 995
 996                 if (!s->avctx->rc_buffer_size)
 997                     dst += INPLACE_OFFSET;
 998
 999                 if (src_stride == dst_stride)
1000                     memcpy(dst, src, src_stride * h);
1001                 else {
1002                     while (h--) {
1003                         memcpy(dst, src, w);
1004                         dst += dst_stride;
1005                         src += src_stride;
1006                     }
1007                 }
1008             }
1009         }
1010     }
1011     copy_picture_attributes(s, pic, pic_arg);
1012     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1013   }
1014
1015     /* shift buffer entries */
1016     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1017         s->input_picture[i - 1] = s->input_picture[i];
1018
1019     s->input_picture[encoding_delay] = (Picture*) pic;
1020
1021     return 0;
1022 }
1023
1024 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1025 {
1026     int x, y, plane;
1027     int score = 0;
1028     int64_t score64 = 0;
1029
1030     for (plane = 0; plane < 3; plane++) {
1031         const int stride = p->f.linesize[plane];
1032         const int bw = plane ? 1 : 2;
1033         for (y = 0; y < s->mb_height * bw; y++) {
1034             for (x = 0; x < s->mb_width * bw; x++) {
1035                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1036                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1037                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1038                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1039
1040                 switch (s->avctx->frame_skip_exp) {
1041                 case 0: score    =  FFMAX(score, v);          break;
1042                 case 1: score   += FFABS(v);                  break;
1043                 case 2: score   += v * v;                     break;
1044                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1045                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1046                 }
1047             }
1048         }
1049     }
1050
1051     if (score)
1052         score64 = score;
1053
1054     if (score64 < s->avctx->frame_skip_threshold)
1055         return 1;
1056     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1057         return 1;
1058     return 0;
1059 }
1060
1061 static int estimate_best_b_count(MpegEncContext *s)
1062 {
1063     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1064     AVCodecContext *c = avcodec_alloc_context3(NULL);
1065     AVFrame input[FF_MAX_B_FRAMES + 2];
1066     const int scale = s->avctx->brd_scale;
1067     int i, j, out_size, p_lambda, b_lambda, lambda2;
1068     int outbuf_size  = s->width * s->height; // FIXME
1069     uint8_t *outbuf  = av_malloc(outbuf_size);
1070     int64_t best_rd  = INT64_MAX;
1071     int best_b_count = -1;
1072
1073     assert(scale >= 0 && scale <= 3);
1074
1075     //emms_c();
1076     //s->next_picture_ptr->quality;
1077     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1078     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1079     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1080     if (!b_lambda) // FIXME we should do this somewhere else
1081         b_lambda = p_lambda;
1082     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1083                FF_LAMBDA_SHIFT;
1084
1085     c->width        = s->width  >> scale;
1086     c->height       = s->height >> scale;
1087     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1088                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1089     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1090     c->mb_decision  = s->avctx->mb_decision;
1091     c->me_cmp       = s->avctx->me_cmp;
1092     c->mb_cmp       = s->avctx->mb_cmp;
1093     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1094     c->pix_fmt      = PIX_FMT_YUV420P;
1095     c->time_base    = s->avctx->time_base;
1096     c->max_b_frames = s->max_b_frames;
1097
1098     if (avcodec_open2(c, codec, NULL) < 0)
1099         return -1;
1100
1101     for (i = 0; i < s->max_b_frames + 2; i++) {
1102         int ysize = c->width * c->height;
1103         int csize = (c->width / 2) * (c->height / 2);
1104         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1105                                                 s->next_picture_ptr;
1106
1107         avcodec_get_frame_defaults(&input[i]);
1108         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1109         input[i].data[1]     = input[i].data[0] + ysize;
1110         input[i].data[2]     = input[i].data[1] + csize;
1111         input[i].linesize[0] = c->width;
1112         input[i].linesize[1] =
1113         input[i].linesize[2] = c->width / 2;
1114
1115         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1116             pre_input = *pre_input_ptr;
1117
1118             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1119                 pre_input.f.data[0] += INPLACE_OFFSET;
1120                 pre_input.f.data[1] += INPLACE_OFFSET;
1121                 pre_input.f.data[2] += INPLACE_OFFSET;
1122             }
1123
1124             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1125                                  pre_input.f.data[0], pre_input.f.linesize[0],
1126                                  c->width,      c->height);
1127             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1128                                  pre_input.f.data[1], pre_input.f.linesize[1],
1129                                  c->width >> 1, c->height >> 1);
1130             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1131                                  pre_input.f.data[2], pre_input.f.linesize[2],
1132                                  c->width >> 1, c->height >> 1);
1133         }
1134     }
1135
1136     for (j = 0; j < s->max_b_frames + 1; j++) {
1137         int64_t rd = 0;
1138
1139         if (!s->input_picture[j])
1140             break;
1141
1142         c->error[0] = c->error[1] = c->error[2] = 0;
1143
1144         input[0].pict_type = AV_PICTURE_TYPE_I;
1145         input[0].quality   = 1 * FF_QP2LAMBDA;
1146         out_size           = avcodec_encode_video(c, outbuf,
1147                                                   outbuf_size, &input[0]);
1148         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1149
1150         for (i = 0; i < s->max_b_frames + 1; i++) {
1151             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1152
1153             input[i + 1].pict_type = is_p ?
1154                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1155             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1156             out_size = avcodec_encode_video(c, outbuf, outbuf_size,
1157                                             &input[i + 1]);
1158             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1159         }
1160
1161         /* get the delayed frames */
1162         while (out_size) {
1163             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
1164             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1165         }
1166
1167         rd += c->error[0] + c->error[1] + c->error[2];
1168
1169         if (rd < best_rd) {
1170             best_rd = rd;
1171             best_b_count = j;
1172         }
1173     }
1174
1175     av_freep(&outbuf);
1176     avcodec_close(c);
1177     av_freep(&c);
1178
1179     for (i = 0; i < s->max_b_frames + 2; i++) {
1180         av_freep(&input[i].data[0]);
1181     }
1182
1183     return best_b_count;
1184 }
1185
1186 static int select_input_picture(MpegEncContext *s)
1187 {
1188     int i;
1189
1190     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1191         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1192     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1193
1194     /* set next picture type & ordering */
1195     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1196         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1197             s->next_picture_ptr == NULL || s->intra_only) {
1198             s->reordered_input_picture[0] = s->input_picture[0];
1199             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1200             s->reordered_input_picture[0]->f.coded_picture_number =
1201                 s->coded_picture_number++;
1202         } else {
1203             int b_frames;
1204
1205             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1206                 if (s->picture_in_gop_number < s->gop_size &&
1207                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1208                     // FIXME check that te gop check above is +-1 correct
1209                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1210                     //       s->input_picture[0]->f.data[0],
1211                     //       s->input_picture[0]->pts);
1212
1213                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1214                         for (i = 0; i < 4; i++)
1215                             s->input_picture[0]->f.data[i] = NULL;
1216                         s->input_picture[0]->f.type = 0;
1217                     } else {
1218                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1219                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1220
1221                         s->avctx->release_buffer(s->avctx,
1222                                                  (AVFrame *) s->input_picture[0]);
1223                     }
1224
1225                     emms_c();
1226                     ff_vbv_update(s, 0);
1227
1228                     goto no_output_pic;
1229                 }
1230             }
1231
1232             if (s->flags & CODEC_FLAG_PASS2) {
1233                 for (i = 0; i < s->max_b_frames + 1; i++) {
1234                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1235
1236                     if (pict_num >= s->rc_context.num_entries)
1237                         break;
1238                     if (!s->input_picture[i]) {
1239                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1240                         break;
1241                     }
1242
1243                     s->input_picture[i]->f.pict_type =
1244                         s->rc_context.entry[pict_num].new_pict_type;
1245                 }
1246             }
1247
1248             if (s->avctx->b_frame_strategy == 0) {
1249                 b_frames = s->max_b_frames;
1250                 while (b_frames && !s->input_picture[b_frames])
1251                     b_frames--;
1252             } else if (s->avctx->b_frame_strategy == 1) {
1253                 for (i = 1; i < s->max_b_frames + 1; i++) {
1254                     if (s->input_picture[i] &&
1255                         s->input_picture[i]->b_frame_score == 0) {
1256                         s->input_picture[i]->b_frame_score =
1257                             get_intra_count(s,
1258                                             s->input_picture[i    ]->f.data[0],
1259                                             s->input_picture[i - 1]->f.data[0],
1260                                             s->linesize) + 1;
1261                     }
1262                 }
1263                 for (i = 0; i < s->max_b_frames + 1; i++) {
1264                     if (s->input_picture[i] == NULL ||
1265                         s->input_picture[i]->b_frame_score - 1 >
1266                             s->mb_num / s->avctx->b_sensitivity)
1267                         break;
1268                 }
1269
1270                 b_frames = FFMAX(0, i - 1);
1271
1272                 /* reset scores */
1273                 for (i = 0; i < b_frames + 1; i++) {
1274                     s->input_picture[i]->b_frame_score = 0;
1275                 }
1276             } else if (s->avctx->b_frame_strategy == 2) {
1277                 b_frames = estimate_best_b_count(s);
1278             } else {
1279                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1280                 b_frames = 0;
1281             }
1282
1283             emms_c();
1284             //static int b_count = 0;
1285             //b_count += b_frames;
1286             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1287
1288             for (i = b_frames - 1; i >= 0; i--) {
1289                 int type = s->input_picture[i]->f.pict_type;
1290                 if (type && type != AV_PICTURE_TYPE_B)
1291                     b_frames = i;
1292             }
1293             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1294                 b_frames == s->max_b_frames) {
1295                 av_log(s->avctx, AV_LOG_ERROR,
1296                        "warning, too many b frames in a row\n");
1297             }
1298
1299             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1300                 if ((s->flags2 & CODEC_FLAG2_STRICT_GOP) &&
1301                     s->gop_size > s->picture_in_gop_number) {
1302                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1303                 } else {
1304                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1305                         b_frames = 0;
1306                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1307                 }
1308             }
1309
1310             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1311                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1312                 b_frames--;
1313
1314             s->reordered_input_picture[0] = s->input_picture[b_frames];
1315             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1316                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1317             s->reordered_input_picture[0]->f.coded_picture_number =
1318                 s->coded_picture_number++;
1319             for (i = 0; i < b_frames; i++) {
1320                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1321                 s->reordered_input_picture[i + 1]->f.pict_type =
1322                     AV_PICTURE_TYPE_B;
1323                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1324                     s->coded_picture_number++;
1325             }
1326         }
1327     }
1328 no_output_pic:
1329     if (s->reordered_input_picture[0]) {
1330         s->reordered_input_picture[0]->f.reference =
1331            s->reordered_input_picture[0]->f.pict_type !=
1332                AV_PICTURE_TYPE_B ? 3 : 0;
1333
1334         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1335
1336         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1337             s->avctx->rc_buffer_size) {
1338             // input is a shared pix, so we can't modifiy it -> alloc a new
1339             // one & ensure that the shared one is reuseable
1340
1341             Picture *pic;
1342             int i = ff_find_unused_picture(s, 0);
1343             if (i < 0)
1344                 return i;
1345             pic = &s->picture[i];
1346
1347             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1348             if (ff_alloc_picture(s, pic, 0) < 0) {
1349                 return -1;
1350             }
1351
1352             /* mark us unused / free shared pic */
1353             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1354                 s->avctx->release_buffer(s->avctx,
1355                                          (AVFrame *) s->reordered_input_picture[0]);
1356             for (i = 0; i < 4; i++)
1357                 s->reordered_input_picture[0]->f.data[i] = NULL;
1358             s->reordered_input_picture[0]->f.type = 0;
1359
1360             copy_picture_attributes(s, (AVFrame *) pic,
1361                                     (AVFrame *) s->reordered_input_picture[0]);
1362
1363             s->current_picture_ptr = pic;
1364         } else {
1365             // input is not a shared pix -> reuse buffer for current_pix
1366
1367             assert(s->reordered_input_picture[0]->f.type ==
1368                        FF_BUFFER_TYPE_USER ||
1369                    s->reordered_input_picture[0]->f.type ==
1370                        FF_BUFFER_TYPE_INTERNAL);
1371
1372             s->current_picture_ptr = s->reordered_input_picture[0];
1373             for (i = 0; i < 4; i++) {
1374                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1375             }
1376         }
1377         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1378
1379         s->picture_number = s->new_picture.f.display_picture_number;
1380         //printf("dpn:%d\n", s->picture_number);
1381     } else {
1382         memset(&s->new_picture, 0, sizeof(Picture));
1383     }
1384     return 0;
1385 }
1386
1387 int MPV_encode_picture(AVCodecContext *avctx,
1388                        unsigned char *buf, int buf_size, void *data)
1389 {
1390     MpegEncContext *s = avctx->priv_data;
1391     AVFrame *pic_arg  = data;
1392     int i, stuffing_count;
1393     int context_count = s->slice_context_count;
1394
1395     for (i = 0; i < context_count; i++) {
1396         int start_y = s->thread_context[i]->start_mb_y;
1397         int   end_y = s->thread_context[i]->  end_mb_y;
1398         int h       = s->mb_height;
1399         uint8_t *start = buf + (size_t)(((int64_t) buf_size) * start_y / h);
1400         uint8_t *end   = buf + (size_t)(((int64_t) buf_size) *   end_y / h);
1401
1402         init_put_bits(&s->thread_context[i]->pb, start, end - start);
1403     }
1404
1405     s->picture_in_gop_number++;
1406
1407     if (load_input_picture(s, pic_arg) < 0)
1408         return -1;
1409
1410     if (select_input_picture(s) < 0) {
1411         return -1;
1412     }
1413
1414     /* output? */
1415     if (s->new_picture.f.data[0]) {
1416         s->pict_type = s->new_picture.f.pict_type;
1417         //emms_c();
1418         //printf("qs:%f %f %d\n", s->new_picture.quality,
1419         //       s->current_picture.quality, s->qscale);
1420         MPV_frame_start(s, avctx);
1421 vbv_retry:
1422         if (encode_picture(s, s->picture_number) < 0)
1423             return -1;
1424
1425         avctx->header_bits = s->header_bits;
1426         avctx->mv_bits     = s->mv_bits;
1427         avctx->misc_bits   = s->misc_bits;
1428         avctx->i_tex_bits  = s->i_tex_bits;
1429         avctx->p_tex_bits  = s->p_tex_bits;
1430         avctx->i_count     = s->i_count;
1431         // FIXME f/b_count in avctx
1432         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1433         avctx->skip_count  = s->skip_count;
1434
1435         MPV_frame_end(s);
1436
1437         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1438             ff_mjpeg_encode_picture_trailer(s);
1439
1440         if (avctx->rc_buffer_size) {
1441             RateControlContext *rcc = &s->rc_context;
1442             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1443
1444             if (put_bits_count(&s->pb) > max_size &&
1445                 s->lambda < s->avctx->lmax) {
1446                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1447                                        (s->qscale + 1) / s->qscale);
1448                 if (s->adaptive_quant) {
1449                     int i;
1450                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1451                         s->lambda_table[i] =
1452                             FFMAX(s->lambda_table[i] + 1,
1453                                   s->lambda_table[i] * (s->qscale + 1) /
1454                                   s->qscale);
1455                 }
1456                 s->mb_skipped = 0;        // done in MPV_frame_start()
1457                 // done in encode_picture() so we must undo it
1458                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1459                     if (s->flipflop_rounding          ||
1460                         s->codec_id == CODEC_ID_H263P ||
1461                         s->codec_id == CODEC_ID_MPEG4)
1462                         s->no_rounding ^= 1;
1463                 }
1464                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1465                     s->time_base       = s->last_time_base;
1466                     s->last_non_b_time = s->time - s->pp_time;
1467                 }
1468                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1469                 for (i = 0; i < context_count; i++) {
1470                     PutBitContext *pb = &s->thread_context[i]->pb;
1471                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1472                 }
1473                 goto vbv_retry;
1474             }
1475
1476             assert(s->avctx->rc_max_rate);
1477         }
1478
1479         if (s->flags & CODEC_FLAG_PASS1)
1480             ff_write_pass1_stats(s);
1481
1482         for (i = 0; i < 4; i++) {
1483             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1484             avctx->error[i] += s->current_picture_ptr->f.error[i];
1485         }
1486
1487         if (s->flags & CODEC_FLAG_PASS1)
1488             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1489                    avctx->i_tex_bits + avctx->p_tex_bits ==
1490                        put_bits_count(&s->pb));
1491         flush_put_bits(&s->pb);
1492         s->frame_bits  = put_bits_count(&s->pb);
1493
1494         stuffing_count = ff_vbv_update(s, s->frame_bits);
1495         if (stuffing_count) {
1496             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1497                     stuffing_count + 50) {
1498                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1499                 return -1;
1500             }
1501
1502             switch (s->codec_id) {
1503             case CODEC_ID_MPEG1VIDEO:
1504             case CODEC_ID_MPEG2VIDEO:
1505                 while (stuffing_count--) {
1506                     put_bits(&s->pb, 8, 0);
1507                 }
1508             break;
1509             case CODEC_ID_MPEG4:
1510                 put_bits(&s->pb, 16, 0);
1511                 put_bits(&s->pb, 16, 0x1C3);
1512                 stuffing_count -= 4;
1513                 while (stuffing_count--) {
1514                     put_bits(&s->pb, 8, 0xFF);
1515                 }
1516             break;
1517             default:
1518                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1519             }
1520             flush_put_bits(&s->pb);
1521             s->frame_bits  = put_bits_count(&s->pb);
1522         }
1523
1524         /* update mpeg1/2 vbv_delay for CBR */
1525         if (s->avctx->rc_max_rate                          &&
1526             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1527             s->out_format == FMT_MPEG1                     &&
1528             90000LL * (avctx->rc_buffer_size - 1) <=
1529                 s->avctx->rc_max_rate * 0xFFFFLL) {
1530             int vbv_delay, min_delay;
1531             double inbits  = s->avctx->rc_max_rate *
1532                              av_q2d(s->avctx->time_base);
1533             int    minbits = s->frame_bits - 8 *
1534                              (s->vbv_delay_ptr - s->pb.buf - 1);
1535             double bits    = s->rc_context.buffer_index + minbits - inbits;
1536
1537             if (bits < 0)
1538                 av_log(s->avctx, AV_LOG_ERROR,
1539                        "Internal error, negative bits\n");
1540
1541             assert(s->repeat_first_field == 0);
1542
1543             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1544             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1545                         s->avctx->rc_max_rate;
1546
1547             vbv_delay = FFMAX(vbv_delay, min_delay);
1548
1549             assert(vbv_delay < 0xFFFF);
1550
1551             s->vbv_delay_ptr[0] &= 0xF8;
1552             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1553             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1554             s->vbv_delay_ptr[2] &= 0x07;
1555             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1556             avctx->vbv_delay     = vbv_delay * 300;
1557         }
1558         s->total_bits     += s->frame_bits;
1559         avctx->frame_bits  = s->frame_bits;
1560     } else {
1561         assert((put_bits_ptr(&s->pb) == s->pb.buf));
1562         s->frame_bits = 0;
1563     }
1564     assert((s->frame_bits & 7) == 0);
1565
1566     return s->frame_bits / 8;
1567 }
1568
1569 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1570                                                 int n, int threshold)
1571 {
1572     static const char tab[64] = {
1573         3, 2, 2, 1, 1, 1, 1, 1,
1574         1, 1, 1, 1, 1, 1, 1, 1,
1575         1, 1, 1, 1, 1, 1, 1, 1,
1576         0, 0, 0, 0, 0, 0, 0, 0,
1577         0, 0, 0, 0, 0, 0, 0, 0,
1578         0, 0, 0, 0, 0, 0, 0, 0,
1579         0, 0, 0, 0, 0, 0, 0, 0,
1580         0, 0, 0, 0, 0, 0, 0, 0
1581     };
1582     int score = 0;
1583     int run = 0;
1584     int i;
1585     DCTELEM *block = s->block[n];
1586     const int last_index = s->block_last_index[n];
1587     int skip_dc;
1588
1589     if (threshold < 0) {
1590         skip_dc = 0;
1591         threshold = -threshold;
1592     } else
1593         skip_dc = 1;
1594
1595     /* Are all we could set to zero already zero? */
1596     if (last_index <= skip_dc - 1)
1597         return;
1598
1599     for (i = 0; i <= last_index; i++) {
1600         const int j = s->intra_scantable.permutated[i];
1601         const int level = FFABS(block[j]);
1602         if (level == 1) {
1603             if (skip_dc && i == 0)
1604                 continue;
1605             score += tab[run];
1606             run = 0;
1607         } else if (level > 1) {
1608             return;
1609         } else {
1610             run++;
1611         }
1612     }
1613     if (score >= threshold)
1614         return;
1615     for (i = skip_dc; i <= last_index; i++) {
1616         const int j = s->intra_scantable.permutated[i];
1617         block[j] = 0;
1618     }
1619     if (block[0])
1620         s->block_last_index[n] = 0;
1621     else
1622         s->block_last_index[n] = -1;
1623 }
1624
1625 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1626                                int last_index)
1627 {
1628     int i;
1629     const int maxlevel = s->max_qcoeff;
1630     const int minlevel = s->min_qcoeff;
1631     int overflow = 0;
1632
1633     if (s->mb_intra) {
1634         i = 1; // skip clipping of intra dc
1635     } else
1636         i = 0;
1637
1638     for (; i <= last_index; i++) {
1639         const int j = s->intra_scantable.permutated[i];
1640         int level = block[j];
1641
1642         if (level > maxlevel) {
1643             level = maxlevel;
1644             overflow++;
1645         } else if (level < minlevel) {
1646             level = minlevel;
1647             overflow++;
1648         }
1649
1650         block[j] = level;
1651     }
1652
1653     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1654         av_log(s->avctx, AV_LOG_INFO,
1655                "warning, clipping %d dct coefficients to %d..%d\n",
1656                overflow, minlevel, maxlevel);
1657 }
1658
1659 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1660 {
1661     int x, y;
1662     // FIXME optimize
1663     for (y = 0; y < 8; y++) {
1664         for (x = 0; x < 8; x++) {
1665             int x2, y2;
1666             int sum = 0;
1667             int sqr = 0;
1668             int count = 0;
1669
1670             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1671                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1672                     int v = ptr[x2 + y2 * stride];
1673                     sum += v;
1674                     sqr += v * v;
1675                     count++;
1676                 }
1677             }
1678             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1679         }
1680     }
1681 }
1682
1683 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1684                                                 int motion_x, int motion_y,
1685                                                 int mb_block_height,
1686                                                 int mb_block_count)
1687 {
1688     int16_t weight[8][64];
1689     DCTELEM orig[8][64];
1690     const int mb_x = s->mb_x;
1691     const int mb_y = s->mb_y;
1692     int i;
1693     int skip_dct[8];
1694     int dct_offset = s->linesize * 8; // default for progressive frames
1695     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1696     int wrap_y, wrap_c;
1697
1698     for (i = 0; i < mb_block_count; i++)
1699         skip_dct[i] = s->skipdct;
1700
1701     if (s->adaptive_quant) {
1702         const int last_qp = s->qscale;
1703         const int mb_xy = mb_x + mb_y * s->mb_stride;
1704
1705         s->lambda = s->lambda_table[mb_xy];
1706         update_qscale(s);
1707
1708         if (!(s->flags & CODEC_FLAG_QP_RD)) {
1709             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1710             s->dquant = s->qscale - last_qp;
1711
1712             if (s->out_format == FMT_H263) {
1713                 s->dquant = av_clip(s->dquant, -2, 2);
1714
1715                 if (s->codec_id == CODEC_ID_MPEG4) {
1716                     if (!s->mb_intra) {
1717                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1718                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1719                                 s->dquant = 0;
1720                         }
1721                         if (s->mv_type == MV_TYPE_8X8)
1722                             s->dquant = 0;
1723                     }
1724                 }
1725             }
1726         }
1727         ff_set_qscale(s, last_qp + s->dquant);
1728     } else if (s->flags & CODEC_FLAG_QP_RD)
1729         ff_set_qscale(s, s->qscale + s->dquant);
1730
1731     wrap_y = s->linesize;
1732     wrap_c = s->uvlinesize;
1733     ptr_y  = s->new_picture.f.data[0] +
1734              (mb_y * 16 * wrap_y)              + mb_x * 16;
1735     ptr_cb = s->new_picture.f.data[1] +
1736              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1737     ptr_cr = s->new_picture.f.data[2] +
1738              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1739
1740     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != CODEC_ID_AMV){
1741         uint8_t *ebuf = s->edge_emu_buffer + 32;
1742         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1743                                 mb_y * 16, s->width, s->height);
1744         ptr_y = ebuf;
1745         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1746                                 mb_block_height, mb_x * 8, mb_y * 8,
1747                                 s->width >> 1, s->height >> 1);
1748         ptr_cb = ebuf + 18 * wrap_y;
1749         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1750                                 mb_block_height, mb_x * 8, mb_y * 8,
1751                                 s->width >> 1, s->height >> 1);
1752         ptr_cr = ebuf + 18 * wrap_y + 8;
1753     }
1754
1755     if (s->mb_intra) {
1756         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1757             int progressive_score, interlaced_score;
1758
1759             s->interlaced_dct = 0;
1760             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1761                                                     NULL, wrap_y, 8) +
1762                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1763                                                     NULL, wrap_y, 8) - 400;
1764
1765             if (progressive_score > 0) {
1766                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1767                                                        NULL, wrap_y * 2, 8) +
1768                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1769                                                        NULL, wrap_y * 2, 8);
1770                 if (progressive_score > interlaced_score) {
1771                     s->interlaced_dct = 1;
1772
1773                     dct_offset = wrap_y;
1774                     wrap_y <<= 1;
1775                     if (s->chroma_format == CHROMA_422)
1776                         wrap_c <<= 1;
1777                 }
1778             }
1779         }
1780
1781         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1782         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1783         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1784         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1785
1786         if (s->flags & CODEC_FLAG_GRAY) {
1787             skip_dct[4] = 1;
1788             skip_dct[5] = 1;
1789         } else {
1790             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1791             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1792             if (!s->chroma_y_shift) { /* 422 */
1793                 s->dsp.get_pixels(s->block[6],
1794                                   ptr_cb + (dct_offset >> 1), wrap_c);
1795                 s->dsp.get_pixels(s->block[7],
1796                                   ptr_cr + (dct_offset >> 1), wrap_c);
1797             }
1798         }
1799     } else {
1800         op_pixels_func (*op_pix)[4];
1801         qpel_mc_func (*op_qpix)[16];
1802         uint8_t *dest_y, *dest_cb, *dest_cr;
1803
1804         dest_y  = s->dest[0];
1805         dest_cb = s->dest[1];
1806         dest_cr = s->dest[2];
1807
1808         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1809             op_pix  = s->dsp.put_pixels_tab;
1810             op_qpix = s->dsp.put_qpel_pixels_tab;
1811         } else {
1812             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1813             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1814         }
1815
1816         if (s->mv_dir & MV_DIR_FORWARD) {
1817             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data,
1818                        op_pix, op_qpix);
1819             op_pix  = s->dsp.avg_pixels_tab;
1820             op_qpix = s->dsp.avg_qpel_pixels_tab;
1821         }
1822         if (s->mv_dir & MV_DIR_BACKWARD) {
1823             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data,
1824                        op_pix, op_qpix);
1825         }
1826
1827         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1828             int progressive_score, interlaced_score;
1829
1830             s->interlaced_dct = 0;
1831             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1832                                                     ptr_y,              wrap_y,
1833                                                     8) +
1834                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1835                                                     ptr_y + wrap_y * 8, wrap_y,
1836                                                     8) - 400;
1837
1838             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1839                 progressive_score -= 400;
1840
1841             if (progressive_score > 0) {
1842                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1843                                                        ptr_y,
1844                                                        wrap_y * 2, 8) +
1845                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1846                                                        ptr_y + wrap_y,
1847                                                        wrap_y * 2, 8);
1848
1849                 if (progressive_score > interlaced_score) {
1850                     s->interlaced_dct = 1;
1851
1852                     dct_offset = wrap_y;
1853                     wrap_y <<= 1;
1854                     if (s->chroma_format == CHROMA_422)
1855                         wrap_c <<= 1;
1856                 }
1857             }
1858         }
1859
1860         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1861         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1862         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1863                            dest_y + dct_offset, wrap_y);
1864         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1865                            dest_y + dct_offset + 8, wrap_y);
1866
1867         if (s->flags & CODEC_FLAG_GRAY) {
1868             skip_dct[4] = 1;
1869             skip_dct[5] = 1;
1870         } else {
1871             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1872             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1873             if (!s->chroma_y_shift) { /* 422 */
1874                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1875                                    dest_cb + (dct_offset >> 1), wrap_c);
1876                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1877                                    dest_cr + (dct_offset >> 1), wrap_c);
1878             }
1879         }
1880         /* pre quantization */
1881         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1882                 2 * s->qscale * s->qscale) {
1883             // FIXME optimize
1884             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1885                               wrap_y, 8) < 20 * s->qscale)
1886                 skip_dct[0] = 1;
1887             if (s->dsp.sad[1](NULL, ptr_y + 8,
1888                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1889                 skip_dct[1] = 1;
1890             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1891                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1892                 skip_dct[2] = 1;
1893             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1894                               dest_y + dct_offset + 8,
1895                               wrap_y, 8) < 20 * s->qscale)
1896                 skip_dct[3] = 1;
1897             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1898                               wrap_c, 8) < 20 * s->qscale)
1899                 skip_dct[4] = 1;
1900             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1901                               wrap_c, 8) < 20 * s->qscale)
1902                 skip_dct[5] = 1;
1903             if (!s->chroma_y_shift) { /* 422 */
1904                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1905                                   dest_cb + (dct_offset >> 1),
1906                                   wrap_c, 8) < 20 * s->qscale)
1907                     skip_dct[6] = 1;
1908                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1909                                   dest_cr + (dct_offset >> 1),
1910                                   wrap_c, 8) < 20 * s->qscale)
1911                     skip_dct[7] = 1;
1912             }
1913         }
1914     }
1915
1916     if (s->avctx->quantizer_noise_shaping) {
1917         if (!skip_dct[0])
1918             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1919         if (!skip_dct[1])
1920             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1921         if (!skip_dct[2])
1922             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1923         if (!skip_dct[3])
1924             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1925         if (!skip_dct[4])
1926             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1927         if (!skip_dct[5])
1928             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1929         if (!s->chroma_y_shift) { /* 422 */
1930             if (!skip_dct[6])
1931                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1932                                   wrap_c);
1933             if (!skip_dct[7])
1934                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1935                                   wrap_c);
1936         }
1937         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
1938     }
1939
1940     /* DCT & quantize */
1941     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1942     {
1943         for (i = 0; i < mb_block_count; i++) {
1944             if (!skip_dct[i]) {
1945                 int overflow;
1946                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1947                 // FIXME we could decide to change to quantizer instead of
1948                 // clipping
1949                 // JS: I don't think that would be a good idea it could lower
1950                 //     quality instead of improve it. Just INTRADC clipping
1951                 //     deserves changes in quantizer
1952                 if (overflow)
1953                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
1954             } else
1955                 s->block_last_index[i] = -1;
1956         }
1957         if (s->avctx->quantizer_noise_shaping) {
1958             for (i = 0; i < mb_block_count; i++) {
1959                 if (!skip_dct[i]) {
1960                     s->block_last_index[i] =
1961                         dct_quantize_refine(s, s->block[i], weight[i],
1962                                             orig[i], i, s->qscale);
1963                 }
1964             }
1965         }
1966
1967         if (s->luma_elim_threshold && !s->mb_intra)
1968             for (i = 0; i < 4; i++)
1969                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
1970         if (s->chroma_elim_threshold && !s->mb_intra)
1971             for (i = 4; i < mb_block_count; i++)
1972                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
1973
1974         if (s->flags & CODEC_FLAG_CBP_RD) {
1975             for (i = 0; i < mb_block_count; i++) {
1976                 if (s->block_last_index[i] == -1)
1977                     s->coded_score[i] = INT_MAX / 256;
1978             }
1979         }
1980     }
1981
1982     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
1983         s->block_last_index[4] =
1984         s->block_last_index[5] = 0;
1985         s->block[4][0] =
1986         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
1987     }
1988
1989     // non c quantize code returns incorrect block_last_index FIXME
1990     if (s->alternate_scan && s->dct_quantize != dct_quantize_c) {
1991         for (i = 0; i < mb_block_count; i++) {
1992             int j;
1993             if (s->block_last_index[i] > 0) {
1994                 for (j = 63; j > 0; j--) {
1995                     if (s->block[i][s->intra_scantable.permutated[j]])
1996                         break;
1997                 }
1998                 s->block_last_index[i] = j;
1999             }
2000         }
2001     }
2002
2003     /* huffman encode */
2004     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2005     case CODEC_ID_MPEG1VIDEO:
2006     case CODEC_ID_MPEG2VIDEO:
2007         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2008             mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2009         break;
2010     case CODEC_ID_MPEG4:
2011         if (CONFIG_MPEG4_ENCODER)
2012             mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2013         break;
2014     case CODEC_ID_MSMPEG4V2:
2015     case CODEC_ID_MSMPEG4V3:
2016     case CODEC_ID_WMV1:
2017         if (CONFIG_MSMPEG4_ENCODER)
2018             msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2019         break;
2020     case CODEC_ID_WMV2:
2021         if (CONFIG_WMV2_ENCODER)
2022             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2023         break;
2024     case CODEC_ID_H261:
2025         if (CONFIG_H261_ENCODER)
2026             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2027         break;
2028     case CODEC_ID_H263:
2029     case CODEC_ID_H263P:
2030     case CODEC_ID_FLV1:
2031     case CODEC_ID_RV10:
2032     case CODEC_ID_RV20:
2033         if (CONFIG_H263_ENCODER)
2034             h263_encode_mb(s, s->block, motion_x, motion_y);
2035         break;
2036     case CODEC_ID_MJPEG:
2037     case CODEC_ID_AMV:
2038         if (CONFIG_MJPEG_ENCODER)
2039             ff_mjpeg_encode_mb(s, s->block);
2040         break;
2041     default:
2042         assert(0);
2043     }
2044 }
2045
2046 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2047 {
2048     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2049     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2050 }
2051
2052 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2053     int i;
2054
2055     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2056
2057     /* mpeg1 */
2058     d->mb_skip_run= s->mb_skip_run;
2059     for(i=0; i<3; i++)
2060         d->last_dc[i] = s->last_dc[i];
2061
2062     /* statistics */
2063     d->mv_bits= s->mv_bits;
2064     d->i_tex_bits= s->i_tex_bits;
2065     d->p_tex_bits= s->p_tex_bits;
2066     d->i_count= s->i_count;
2067     d->f_count= s->f_count;
2068     d->b_count= s->b_count;
2069     d->skip_count= s->skip_count;
2070     d->misc_bits= s->misc_bits;
2071     d->last_bits= 0;
2072
2073     d->mb_skipped= 0;
2074     d->qscale= s->qscale;
2075     d->dquant= s->dquant;
2076
2077     d->esc3_level_length= s->esc3_level_length;
2078 }
2079
2080 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2081     int i;
2082
2083     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2084     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2085
2086     /* mpeg1 */
2087     d->mb_skip_run= s->mb_skip_run;
2088     for(i=0; i<3; i++)
2089         d->last_dc[i] = s->last_dc[i];
2090
2091     /* statistics */
2092     d->mv_bits= s->mv_bits;
2093     d->i_tex_bits= s->i_tex_bits;
2094     d->p_tex_bits= s->p_tex_bits;
2095     d->i_count= s->i_count;
2096     d->f_count= s->f_count;
2097     d->b_count= s->b_count;
2098     d->skip_count= s->skip_count;
2099     d->misc_bits= s->misc_bits;
2100
2101     d->mb_intra= s->mb_intra;
2102     d->mb_skipped= s->mb_skipped;
2103     d->mv_type= s->mv_type;
2104     d->mv_dir= s->mv_dir;
2105     d->pb= s->pb;
2106     if(s->data_partitioning){
2107         d->pb2= s->pb2;
2108         d->tex_pb= s->tex_pb;
2109     }
2110     d->block= s->block;
2111     for(i=0; i<8; i++)
2112         d->block_last_index[i]= s->block_last_index[i];
2113     d->interlaced_dct= s->interlaced_dct;
2114     d->qscale= s->qscale;
2115
2116     d->esc3_level_length= s->esc3_level_length;
2117 }
2118
2119 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2120                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2121                            int *dmin, int *next_block, int motion_x, int motion_y)
2122 {
2123     int score;
2124     uint8_t *dest_backup[3];
2125
2126     copy_context_before_encode(s, backup, type);
2127
2128     s->block= s->blocks[*next_block];
2129     s->pb= pb[*next_block];
2130     if(s->data_partitioning){
2131         s->pb2   = pb2   [*next_block];
2132         s->tex_pb= tex_pb[*next_block];
2133     }
2134
2135     if(*next_block){
2136         memcpy(dest_backup, s->dest, sizeof(s->dest));
2137         s->dest[0] = s->rd_scratchpad;
2138         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2139         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2140         assert(s->linesize >= 32); //FIXME
2141     }
2142
2143     encode_mb(s, motion_x, motion_y);
2144
2145     score= put_bits_count(&s->pb);
2146     if(s->data_partitioning){
2147         score+= put_bits_count(&s->pb2);
2148         score+= put_bits_count(&s->tex_pb);
2149     }
2150
2151     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2152         MPV_decode_mb(s, s->block);
2153
2154         score *= s->lambda2;
2155         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2156     }
2157
2158     if(*next_block){
2159         memcpy(s->dest, dest_backup, sizeof(s->dest));
2160     }
2161
2162     if(score<*dmin){
2163         *dmin= score;
2164         *next_block^=1;
2165
2166         copy_context_after_encode(best, s, type);
2167     }
2168 }
2169
2170 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2171     uint32_t *sq = ff_squareTbl + 256;
2172     int acc=0;
2173     int x,y;
2174
2175     if(w==16 && h==16)
2176         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2177     else if(w==8 && h==8)
2178         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2179
2180     for(y=0; y<h; y++){
2181         for(x=0; x<w; x++){
2182             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2183         }
2184     }
2185
2186     assert(acc>=0);
2187
2188     return acc;
2189 }
2190
2191 static int sse_mb(MpegEncContext *s){
2192     int w= 16;
2193     int h= 16;
2194
2195     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2196     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2197
2198     if(w==16 && h==16)
2199       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2200         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2201                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2202                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2203       }else{
2204         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2205                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2206                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2207       }
2208     else
2209         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2210                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2211                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2212 }
2213
2214 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2215     MpegEncContext *s= *(void**)arg;
2216
2217
2218     s->me.pre_pass=1;
2219     s->me.dia_size= s->avctx->pre_dia_size;
2220     s->first_slice_line=1;
2221     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2222         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2223             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2224         }
2225         s->first_slice_line=0;
2226     }
2227
2228     s->me.pre_pass=0;
2229
2230     return 0;
2231 }
2232
2233 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2234     MpegEncContext *s= *(void**)arg;
2235
2236     ff_check_alignment();
2237
2238     s->me.dia_size= s->avctx->dia_size;
2239     s->first_slice_line=1;
2240     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2241         s->mb_x=0; //for block init below
2242         ff_init_block_index(s);
2243         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2244             s->block_index[0]+=2;
2245             s->block_index[1]+=2;
2246             s->block_index[2]+=2;
2247             s->block_index[3]+=2;
2248
2249             /* compute motion vector & mb_type and store in context */
2250             if(s->pict_type==AV_PICTURE_TYPE_B)
2251                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2252             else
2253                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2254         }
2255         s->first_slice_line=0;
2256     }
2257     return 0;
2258 }
2259
2260 static int mb_var_thread(AVCodecContext *c, void *arg){
2261     MpegEncContext *s= *(void**)arg;
2262     int mb_x, mb_y;
2263
2264     ff_check_alignment();
2265
2266     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2267         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2268             int xx = mb_x * 16;
2269             int yy = mb_y * 16;
2270             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2271             int varc;
2272             int sum = s->dsp.pix_sum(pix, s->linesize);
2273
2274             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2275
2276             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2277             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2278             s->me.mb_var_sum_temp    += varc;
2279         }
2280     }
2281     return 0;
2282 }
2283
2284 static void write_slice_end(MpegEncContext *s){
2285     if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4){
2286         if(s->partitioned_frame){
2287             ff_mpeg4_merge_partitions(s);
2288         }
2289
2290         ff_mpeg4_stuffing(&s->pb);
2291     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2292         ff_mjpeg_encode_stuffing(&s->pb);
2293     }
2294
2295     avpriv_align_put_bits(&s->pb);
2296     flush_put_bits(&s->pb);
2297
2298     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2299         s->misc_bits+= get_bits_diff(s);
2300 }
2301
2302 static int encode_thread(AVCodecContext *c, void *arg){
2303     MpegEncContext *s= *(void**)arg;
2304     int mb_x, mb_y, pdif = 0;
2305     int chr_h= 16>>s->chroma_y_shift;
2306     int i, j;
2307     MpegEncContext best_s, backup_s;
2308     uint8_t bit_buf[2][MAX_MB_BYTES];
2309     uint8_t bit_buf2[2][MAX_MB_BYTES];
2310     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2311     PutBitContext pb[2], pb2[2], tex_pb[2];
2312 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2313
2314     ff_check_alignment();
2315
2316     for(i=0; i<2; i++){
2317         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2318         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2319         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2320     }
2321
2322     s->last_bits= put_bits_count(&s->pb);
2323     s->mv_bits=0;
2324     s->misc_bits=0;
2325     s->i_tex_bits=0;
2326     s->p_tex_bits=0;
2327     s->i_count=0;
2328     s->f_count=0;
2329     s->b_count=0;
2330     s->skip_count=0;
2331
2332     for(i=0; i<3; i++){
2333         /* init last dc values */
2334         /* note: quant matrix value (8) is implied here */
2335         s->last_dc[i] = 128 << s->intra_dc_precision;
2336
2337         s->current_picture.f.error[i] = 0;
2338     }
2339     if(s->codec_id==CODEC_ID_AMV){
2340         s->last_dc[0] = 128*8/13;
2341         s->last_dc[1] = 128*8/14;
2342         s->last_dc[2] = 128*8/14;
2343     }
2344     s->mb_skip_run = 0;
2345     memset(s->last_mv, 0, sizeof(s->last_mv));
2346
2347     s->last_mv_dir = 0;
2348
2349     switch(s->codec_id){
2350     case CODEC_ID_H263:
2351     case CODEC_ID_H263P:
2352     case CODEC_ID_FLV1:
2353         if (CONFIG_H263_ENCODER)
2354             s->gob_index = ff_h263_get_gob_height(s);
2355         break;
2356     case CODEC_ID_MPEG4:
2357         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2358             ff_mpeg4_init_partitions(s);
2359         break;
2360     }
2361
2362     s->resync_mb_x=0;
2363     s->resync_mb_y=0;
2364     s->first_slice_line = 1;
2365     s->ptr_lastgob = s->pb.buf;
2366     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2367 //    printf("row %d at %X\n", s->mb_y, (int)s);
2368         s->mb_x=0;
2369         s->mb_y= mb_y;
2370
2371         ff_set_qscale(s, s->qscale);
2372         ff_init_block_index(s);
2373
2374         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2375             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2376             int mb_type= s->mb_type[xy];
2377 //            int d;
2378             int dmin= INT_MAX;
2379             int dir;
2380
2381             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2382                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2383                 return -1;
2384             }
2385             if(s->data_partitioning){
2386                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2387                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2388                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2389                     return -1;
2390                 }
2391             }
2392
2393             s->mb_x = mb_x;
2394             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2395             ff_update_block_index(s);
2396
2397             if(CONFIG_H261_ENCODER && s->codec_id == CODEC_ID_H261){
2398                 ff_h261_reorder_mb_index(s);
2399                 xy= s->mb_y*s->mb_stride + s->mb_x;
2400                 mb_type= s->mb_type[xy];
2401             }
2402
2403             /* write gob / video packet header  */
2404             if(s->rtp_mode){
2405                 int current_packet_size, is_gob_start;
2406
2407                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2408
2409                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2410
2411                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2412
2413                 switch(s->codec_id){
2414                 case CODEC_ID_H263:
2415                 case CODEC_ID_H263P:
2416                     if(!s->h263_slice_structured)
2417                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2418                     break;
2419                 case CODEC_ID_MPEG2VIDEO:
2420                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2421                 case CODEC_ID_MPEG1VIDEO:
2422                     if(s->mb_skip_run) is_gob_start=0;
2423                     break;
2424                 }
2425
2426                 if(is_gob_start){
2427                     if(s->start_mb_y != mb_y || mb_x!=0){
2428                         write_slice_end(s);
2429
2430                         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
2431                             ff_mpeg4_init_partitions(s);
2432                         }
2433                     }
2434
2435                     assert((put_bits_count(&s->pb)&7) == 0);
2436                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2437
2438                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2439                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2440                         int d= 100 / s->avctx->error_rate;
2441                         if(r % d == 0){
2442                             current_packet_size=0;
2443                             s->pb.buf_ptr= s->ptr_lastgob;
2444                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2445                         }
2446                     }
2447
2448                     if (s->avctx->rtp_callback){
2449                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2450                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2451                     }
2452
2453                     switch(s->codec_id){
2454                     case CODEC_ID_MPEG4:
2455                         if (CONFIG_MPEG4_ENCODER) {
2456                             ff_mpeg4_encode_video_packet_header(s);
2457                             ff_mpeg4_clean_buffers(s);
2458                         }
2459                     break;
2460                     case CODEC_ID_MPEG1VIDEO:
2461                     case CODEC_ID_MPEG2VIDEO:
2462                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2463                             ff_mpeg1_encode_slice_header(s);
2464                             ff_mpeg1_clean_buffers(s);
2465                         }
2466                     break;
2467                     case CODEC_ID_H263:
2468                     case CODEC_ID_H263P:
2469                         if (CONFIG_H263_ENCODER)
2470                             h263_encode_gob_header(s, mb_y);
2471                     break;
2472                     }
2473
2474                     if(s->flags&CODEC_FLAG_PASS1){
2475                         int bits= put_bits_count(&s->pb);
2476                         s->misc_bits+= bits - s->last_bits;
2477                         s->last_bits= bits;
2478                     }
2479
2480                     s->ptr_lastgob += current_packet_size;
2481                     s->first_slice_line=1;
2482                     s->resync_mb_x=mb_x;
2483                     s->resync_mb_y=mb_y;
2484                 }
2485             }
2486
2487             if(  (s->resync_mb_x   == s->mb_x)
2488                && s->resync_mb_y+1 == s->mb_y){
2489                 s->first_slice_line=0;
2490             }
2491
2492             s->mb_skipped=0;
2493             s->dquant=0; //only for QP_RD
2494
2495             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
2496                 int next_block=0;
2497                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2498
2499                 copy_context_before_encode(&backup_s, s, -1);
2500                 backup_s.pb= s->pb;
2501                 best_s.data_partitioning= s->data_partitioning;
2502                 best_s.partitioned_frame= s->partitioned_frame;
2503                 if(s->data_partitioning){
2504                     backup_s.pb2= s->pb2;
2505                     backup_s.tex_pb= s->tex_pb;
2506                 }
2507
2508                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2509                     s->mv_dir = MV_DIR_FORWARD;
2510                     s->mv_type = MV_TYPE_16X16;
2511                     s->mb_intra= 0;
2512                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2513                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2514                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2515                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2516                 }
2517                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2518                     s->mv_dir = MV_DIR_FORWARD;
2519                     s->mv_type = MV_TYPE_FIELD;
2520                     s->mb_intra= 0;
2521                     for(i=0; i<2; i++){
2522                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2523                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2524                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2525                     }
2526                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2527                                  &dmin, &next_block, 0, 0);
2528                 }
2529                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2530                     s->mv_dir = MV_DIR_FORWARD;
2531                     s->mv_type = MV_TYPE_16X16;
2532                     s->mb_intra= 0;
2533                     s->mv[0][0][0] = 0;
2534                     s->mv[0][0][1] = 0;
2535                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2536                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2537                 }
2538                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2539                     s->mv_dir = MV_DIR_FORWARD;
2540                     s->mv_type = MV_TYPE_8X8;
2541                     s->mb_intra= 0;
2542                     for(i=0; i<4; i++){
2543                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2544                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2545                     }
2546                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2547                                  &dmin, &next_block, 0, 0);
2548                 }
2549                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2550                     s->mv_dir = MV_DIR_FORWARD;
2551                     s->mv_type = MV_TYPE_16X16;
2552                     s->mb_intra= 0;
2553                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2554                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2555                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2556                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2557                 }
2558                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2559                     s->mv_dir = MV_DIR_BACKWARD;
2560                     s->mv_type = MV_TYPE_16X16;
2561                     s->mb_intra= 0;
2562                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2563                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2564                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2565                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2566                 }
2567                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2568                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2569                     s->mv_type = MV_TYPE_16X16;
2570                     s->mb_intra= 0;
2571                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2572                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2573                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2574                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2575                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2576                                  &dmin, &next_block, 0, 0);
2577                 }
2578                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2579                     s->mv_dir = MV_DIR_FORWARD;
2580                     s->mv_type = MV_TYPE_FIELD;
2581                     s->mb_intra= 0;
2582                     for(i=0; i<2; i++){
2583                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2584                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2585                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2586                     }
2587                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2588                                  &dmin, &next_block, 0, 0);
2589                 }
2590                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2591                     s->mv_dir = MV_DIR_BACKWARD;
2592                     s->mv_type = MV_TYPE_FIELD;
2593                     s->mb_intra= 0;
2594                     for(i=0; i<2; i++){
2595                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2596                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2597                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2598                     }
2599                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2600                                  &dmin, &next_block, 0, 0);
2601                 }
2602                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2603                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2604                     s->mv_type = MV_TYPE_FIELD;
2605                     s->mb_intra= 0;
2606                     for(dir=0; dir<2; dir++){
2607                         for(i=0; i<2; i++){
2608                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2609                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2610                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2611                         }
2612                     }
2613                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2614                                  &dmin, &next_block, 0, 0);
2615                 }
2616                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2617                     s->mv_dir = 0;
2618                     s->mv_type = MV_TYPE_16X16;
2619                     s->mb_intra= 1;
2620                     s->mv[0][0][0] = 0;
2621                     s->mv[0][0][1] = 0;
2622                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2623                                  &dmin, &next_block, 0, 0);
2624                     if(s->h263_pred || s->h263_aic){
2625                         if(best_s.mb_intra)
2626                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2627                         else
2628                             ff_clean_intra_table_entries(s); //old mode?
2629                     }
2630                 }
2631
2632                 if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
2633                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2634                         const int last_qp= backup_s.qscale;
2635                         int qpi, qp, dc[6];
2636                         DCTELEM ac[6][16];
2637                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2638                         static const int dquant_tab[4]={-1,1,-2,2};
2639
2640                         assert(backup_s.dquant == 0);
2641
2642                         //FIXME intra
2643                         s->mv_dir= best_s.mv_dir;
2644                         s->mv_type = MV_TYPE_16X16;
2645                         s->mb_intra= best_s.mb_intra;
2646                         s->mv[0][0][0] = best_s.mv[0][0][0];
2647                         s->mv[0][0][1] = best_s.mv[0][0][1];
2648                         s->mv[1][0][0] = best_s.mv[1][0][0];
2649                         s->mv[1][0][1] = best_s.mv[1][0][1];
2650
2651                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2652                         for(; qpi<4; qpi++){
2653                             int dquant= dquant_tab[qpi];
2654                             qp= last_qp + dquant;
2655                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2656                                 continue;
2657                             backup_s.dquant= dquant;
2658                             if(s->mb_intra && s->dc_val[0]){
2659                                 for(i=0; i<6; i++){
2660                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2661                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2662                                 }
2663                             }
2664
2665                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2666                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2667                             if(best_s.qscale != qp){
2668                                 if(s->mb_intra && s->dc_val[0]){
2669                                     for(i=0; i<6; i++){
2670                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2671                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2672                                     }
2673                                 }
2674                             }
2675                         }
2676                     }
2677                 }
2678                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2679                     int mx= s->b_direct_mv_table[xy][0];
2680                     int my= s->b_direct_mv_table[xy][1];
2681
2682                     backup_s.dquant = 0;
2683                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2684                     s->mb_intra= 0;
2685                     ff_mpeg4_set_direct_mv(s, mx, my);
2686                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2687                                  &dmin, &next_block, mx, my);
2688                 }
2689                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2690                     backup_s.dquant = 0;
2691                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2692                     s->mb_intra= 0;
2693                     ff_mpeg4_set_direct_mv(s, 0, 0);
2694                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2695                                  &dmin, &next_block, 0, 0);
2696                 }
2697                 if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){
2698                     int coded=0;
2699                     for(i=0; i<6; i++)
2700                         coded |= s->block_last_index[i];
2701                     if(coded){
2702                         int mx,my;
2703                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2704                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2705                             mx=my=0; //FIXME find the one we actually used
2706                             ff_mpeg4_set_direct_mv(s, mx, my);
2707                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2708                             mx= s->mv[1][0][0];
2709                             my= s->mv[1][0][1];
2710                         }else{
2711                             mx= s->mv[0][0][0];
2712                             my= s->mv[0][0][1];
2713                         }
2714
2715                         s->mv_dir= best_s.mv_dir;
2716                         s->mv_type = best_s.mv_type;
2717                         s->mb_intra= 0;
2718 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2719                         s->mv[0][0][1] = best_s.mv[0][0][1];
2720                         s->mv[1][0][0] = best_s.mv[1][0][0];
2721                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2722                         backup_s.dquant= 0;
2723                         s->skipdct=1;
2724                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2725                                         &dmin, &next_block, mx, my);
2726                         s->skipdct=0;
2727                     }
2728                 }
2729
2730                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2731
2732                 copy_context_after_encode(s, &best_s, -1);
2733
2734                 pb_bits_count= put_bits_count(&s->pb);
2735                 flush_put_bits(&s->pb);
2736                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2737                 s->pb= backup_s.pb;
2738
2739                 if(s->data_partitioning){
2740                     pb2_bits_count= put_bits_count(&s->pb2);
2741                     flush_put_bits(&s->pb2);
2742                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2743                     s->pb2= backup_s.pb2;
2744
2745                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2746                     flush_put_bits(&s->tex_pb);
2747                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2748                     s->tex_pb= backup_s.tex_pb;
2749                 }
2750                 s->last_bits= put_bits_count(&s->pb);
2751
2752                 if (CONFIG_H263_ENCODER &&
2753                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2754                     ff_h263_update_motion_val(s);
2755
2756                 if(next_block==0){ //FIXME 16 vs linesize16
2757                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2758                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2759                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2760                 }
2761
2762                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2763                     MPV_decode_mb(s, s->block);
2764             } else {
2765                 int motion_x = 0, motion_y = 0;
2766                 s->mv_type=MV_TYPE_16X16;
2767                 // only one MB-Type possible
2768
2769                 switch(mb_type){
2770                 case CANDIDATE_MB_TYPE_INTRA:
2771                     s->mv_dir = 0;
2772                     s->mb_intra= 1;
2773                     motion_x= s->mv[0][0][0] = 0;
2774                     motion_y= s->mv[0][0][1] = 0;
2775                     break;
2776                 case CANDIDATE_MB_TYPE_INTER:
2777                     s->mv_dir = MV_DIR_FORWARD;
2778                     s->mb_intra= 0;
2779                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2780                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2781                     break;
2782                 case CANDIDATE_MB_TYPE_INTER_I:
2783                     s->mv_dir = MV_DIR_FORWARD;
2784                     s->mv_type = MV_TYPE_FIELD;
2785                     s->mb_intra= 0;
2786                     for(i=0; i<2; i++){
2787                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2788                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2789                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2790                     }
2791                     break;
2792                 case CANDIDATE_MB_TYPE_INTER4V:
2793                     s->mv_dir = MV_DIR_FORWARD;
2794                     s->mv_type = MV_TYPE_8X8;
2795                     s->mb_intra= 0;
2796                     for(i=0; i<4; i++){
2797                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2798                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2799                     }
2800                     break;
2801                 case CANDIDATE_MB_TYPE_DIRECT:
2802                     if (CONFIG_MPEG4_ENCODER) {
2803                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2804                         s->mb_intra= 0;
2805                         motion_x=s->b_direct_mv_table[xy][0];
2806                         motion_y=s->b_direct_mv_table[xy][1];
2807                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2808                     }
2809                     break;
2810                 case CANDIDATE_MB_TYPE_DIRECT0:
2811                     if (CONFIG_MPEG4_ENCODER) {
2812                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2813                         s->mb_intra= 0;
2814                         ff_mpeg4_set_direct_mv(s, 0, 0);
2815                     }
2816                     break;
2817                 case CANDIDATE_MB_TYPE_BIDIR:
2818                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2819                     s->mb_intra= 0;
2820                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2821                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2822                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2823                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2824                     break;
2825                 case CANDIDATE_MB_TYPE_BACKWARD:
2826                     s->mv_dir = MV_DIR_BACKWARD;
2827                     s->mb_intra= 0;
2828                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2829                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2830                     break;
2831                 case CANDIDATE_MB_TYPE_FORWARD:
2832                     s->mv_dir = MV_DIR_FORWARD;
2833                     s->mb_intra= 0;
2834                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2835                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2836 //                    printf(" %d %d ", motion_x, motion_y);
2837                     break;
2838                 case CANDIDATE_MB_TYPE_FORWARD_I:
2839                     s->mv_dir = MV_DIR_FORWARD;
2840                     s->mv_type = MV_TYPE_FIELD;
2841                     s->mb_intra= 0;
2842                     for(i=0; i<2; i++){
2843                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2844                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2845                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2846                     }
2847                     break;
2848                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2849                     s->mv_dir = MV_DIR_BACKWARD;
2850                     s->mv_type = MV_TYPE_FIELD;
2851                     s->mb_intra= 0;
2852                     for(i=0; i<2; i++){
2853                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2854                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2855                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2856                     }
2857                     break;
2858                 case CANDIDATE_MB_TYPE_BIDIR_I:
2859                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2860                     s->mv_type = MV_TYPE_FIELD;
2861                     s->mb_intra= 0;
2862                     for(dir=0; dir<2; dir++){
2863                         for(i=0; i<2; i++){
2864                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2865                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2866                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2867                         }
2868                     }
2869                     break;
2870                 default:
2871                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2872                 }
2873
2874                 encode_mb(s, motion_x, motion_y);
2875
2876                 // RAL: Update last macroblock type
2877                 s->last_mv_dir = s->mv_dir;
2878
2879                 if (CONFIG_H263_ENCODER &&
2880                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2881                     ff_h263_update_motion_val(s);
2882
2883                 MPV_decode_mb(s, s->block);
2884             }
2885
2886             /* clean the MV table in IPS frames for direct mode in B frames */
2887             if(s->mb_intra /* && I,P,S_TYPE */){
2888                 s->p_mv_table[xy][0]=0;
2889                 s->p_mv_table[xy][1]=0;
2890             }
2891
2892             if(s->flags&CODEC_FLAG_PSNR){
2893                 int w= 16;
2894                 int h= 16;
2895
2896                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2897                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2898
2899                 s->current_picture.f.error[0] += sse(
2900                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2901                     s->dest[0], w, h, s->linesize);
2902                 s->current_picture.f.error[1] += sse(
2903                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2904                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2905                 s->current_picture.f.error[2] += sse(
2906                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2907                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2908             }
2909             if(s->loop_filter){
2910                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2911                     ff_h263_loop_filter(s);
2912             }
2913 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
2914         }
2915     }
2916
2917     //not beautiful here but we must write it before flushing so it has to be here
2918     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
2919         msmpeg4_encode_ext_header(s);
2920
2921     write_slice_end(s);
2922
2923     /* Send the last GOB if RTP */
2924     if (s->avctx->rtp_callback) {
2925         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
2926         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
2927         /* Call the RTP callback to send the last GOB */
2928         emms_c();
2929         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
2930     }
2931
2932     return 0;
2933 }
2934
2935 #define MERGE(field) dst->field += src->field; src->field=0
2936 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
2937     MERGE(me.scene_change_score);
2938     MERGE(me.mc_mb_var_sum_temp);
2939     MERGE(me.mb_var_sum_temp);
2940 }
2941
2942 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
2943     int i;
2944
2945     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
2946     MERGE(dct_count[1]);
2947     MERGE(mv_bits);
2948     MERGE(i_tex_bits);
2949     MERGE(p_tex_bits);
2950     MERGE(i_count);
2951     MERGE(f_count);
2952     MERGE(b_count);
2953     MERGE(skip_count);
2954     MERGE(misc_bits);
2955     MERGE(error_count);
2956     MERGE(padding_bug_score);
2957     MERGE(current_picture.f.error[0]);
2958     MERGE(current_picture.f.error[1]);
2959     MERGE(current_picture.f.error[2]);
2960
2961     if(dst->avctx->noise_reduction){
2962         for(i=0; i<64; i++){
2963             MERGE(dct_error_sum[0][i]);
2964             MERGE(dct_error_sum[1][i]);
2965         }
2966     }
2967
2968     assert(put_bits_count(&src->pb) % 8 ==0);
2969     assert(put_bits_count(&dst->pb) % 8 ==0);
2970     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
2971     flush_put_bits(&dst->pb);
2972 }
2973
2974 static int estimate_qp(MpegEncContext *s, int dry_run){
2975     if (s->next_lambda){
2976         s->current_picture_ptr->f.quality =
2977         s->current_picture.f.quality = s->next_lambda;
2978         if(!dry_run) s->next_lambda= 0;
2979     } else if (!s->fixed_qscale) {
2980         s->current_picture_ptr->f.quality =
2981         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
2982         if (s->current_picture.f.quality < 0)
2983             return -1;
2984     }
2985
2986     if(s->adaptive_quant){
2987         switch(s->codec_id){
2988         case CODEC_ID_MPEG4:
2989             if (CONFIG_MPEG4_ENCODER)
2990                 ff_clean_mpeg4_qscales(s);
2991             break;
2992         case CODEC_ID_H263:
2993         case CODEC_ID_H263P:
2994         case CODEC_ID_FLV1:
2995             if (CONFIG_H263_ENCODER)
2996                 ff_clean_h263_qscales(s);
2997             break;
2998         default:
2999             ff_init_qscale_tab(s);
3000         }
3001
3002         s->lambda= s->lambda_table[0];
3003         //FIXME broken
3004     }else
3005         s->lambda = s->current_picture.f.quality;
3006 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3007     update_qscale(s);
3008     return 0;
3009 }
3010
3011 /* must be called before writing the header */
3012 static void set_frame_distances(MpegEncContext * s){
3013     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3014     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3015
3016     if(s->pict_type==AV_PICTURE_TYPE_B){
3017         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3018         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3019     }else{
3020         s->pp_time= s->time - s->last_non_b_time;
3021         s->last_non_b_time= s->time;
3022         assert(s->picture_number==0 || s->pp_time > 0);
3023     }
3024 }
3025
3026 static int encode_picture(MpegEncContext *s, int picture_number)
3027 {
3028     int i;
3029     int bits;
3030     int context_count = s->slice_context_count;
3031
3032     s->picture_number = picture_number;
3033
3034     /* Reset the average MB variance */
3035     s->me.mb_var_sum_temp    =
3036     s->me.mc_mb_var_sum_temp = 0;
3037
3038     /* we need to initialize some time vars before we can encode b-frames */
3039     // RAL: Condition added for MPEG1VIDEO
3040     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3041         set_frame_distances(s);
3042     if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4)
3043         ff_set_mpeg4_time(s);
3044
3045     s->me.scene_change_score=0;
3046
3047 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3048
3049     if(s->pict_type==AV_PICTURE_TYPE_I){
3050         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3051         else                        s->no_rounding=0;
3052     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3053         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3054             s->no_rounding ^= 1;
3055     }
3056
3057     if(s->flags & CODEC_FLAG_PASS2){
3058         if (estimate_qp(s,1) < 0)
3059             return -1;
3060         ff_get_2pass_fcode(s);
3061     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3062         if(s->pict_type==AV_PICTURE_TYPE_B)
3063             s->lambda= s->last_lambda_for[s->pict_type];
3064         else
3065             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3066         update_qscale(s);
3067     }
3068
3069     if(s->codec_id != CODEC_ID_AMV){
3070         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3071         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3072         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3073         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3074     }
3075
3076     s->mb_intra=0; //for the rate distortion & bit compare functions
3077     for(i=1; i<context_count; i++){
3078         ff_update_duplicate_context(s->thread_context[i], s);
3079     }
3080
3081     if(ff_init_me(s)<0)
3082         return -1;
3083
3084     /* Estimate motion for every MB */
3085     if(s->pict_type != AV_PICTURE_TYPE_I){
3086         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3087         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3088         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3089             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3090                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3091             }
3092         }
3093
3094         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3095     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3096         /* I-Frame */
3097         for(i=0; i<s->mb_stride*s->mb_height; i++)
3098             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3099
3100         if(!s->fixed_qscale){
3101             /* finding spatial complexity for I-frame rate control */
3102             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3103         }
3104     }
3105     for(i=1; i<context_count; i++){
3106         merge_context_after_me(s, s->thread_context[i]);
3107     }
3108     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3109     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3110     emms_c();
3111
3112     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3113         s->pict_type= AV_PICTURE_TYPE_I;
3114         for(i=0; i<s->mb_stride*s->mb_height; i++)
3115             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3116 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3117     }
3118
3119     if(!s->umvplus){
3120         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3121             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3122
3123             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3124                 int a,b;
3125                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3126                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3127                 s->f_code= FFMAX3(s->f_code, a, b);
3128             }
3129
3130             ff_fix_long_p_mvs(s);
3131             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3132             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3133                 int j;
3134                 for(i=0; i<2; i++){
3135                     for(j=0; j<2; j++)
3136                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3137                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3138                 }
3139             }
3140         }
3141
3142         if(s->pict_type==AV_PICTURE_TYPE_B){
3143             int a, b;
3144
3145             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3146             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3147             s->f_code = FFMAX(a, b);
3148
3149             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3150             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3151             s->b_code = FFMAX(a, b);
3152
3153             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3154             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3155             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3156             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3157             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3158                 int dir, j;
3159                 for(dir=0; dir<2; dir++){
3160                     for(i=0; i<2; i++){
3161                         for(j=0; j<2; j++){
3162                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3163                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3164                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3165                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3166                         }
3167                     }
3168                 }
3169             }
3170         }
3171     }
3172
3173     if (estimate_qp(s, 0) < 0)
3174         return -1;
3175
3176     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3177         s->qscale= 3; //reduce clipping problems
3178
3179     if (s->out_format == FMT_MJPEG) {
3180         /* for mjpeg, we do include qscale in the matrix */
3181         for(i=1;i<64;i++){
3182             int j= s->dsp.idct_permutation[i];
3183
3184             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3185         }
3186         s->y_dc_scale_table=
3187         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3188         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3189         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3190                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3191         s->qscale= 8;
3192     }
3193     if(s->codec_id == CODEC_ID_AMV){
3194         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3195         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3196         for(i=1;i<64;i++){
3197             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3198
3199             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3200             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3201         }
3202         s->y_dc_scale_table= y;
3203         s->c_dc_scale_table= c;
3204         s->intra_matrix[0] = 13;
3205         s->chroma_intra_matrix[0] = 14;
3206         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3207                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3208         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3209                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3210         s->qscale= 8;
3211     }
3212
3213     //FIXME var duplication
3214     s->current_picture_ptr->f.key_frame =
3215     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3216     s->current_picture_ptr->f.pict_type =
3217     s->current_picture.f.pict_type = s->pict_type;
3218
3219     if (s->current_picture.f.key_frame)
3220         s->picture_in_gop_number=0;
3221
3222     s->last_bits= put_bits_count(&s->pb);
3223     switch(s->out_format) {
3224     case FMT_MJPEG:
3225         if (CONFIG_MJPEG_ENCODER)
3226             ff_mjpeg_encode_picture_header(s);
3227         break;
3228     case FMT_H261:
3229         if (CONFIG_H261_ENCODER)
3230             ff_h261_encode_picture_header(s, picture_number);
3231         break;
3232     case FMT_H263:
3233         if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
3234             ff_wmv2_encode_picture_header(s, picture_number);
3235         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3236             msmpeg4_encode_picture_header(s, picture_number);
3237         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3238             mpeg4_encode_picture_header(s, picture_number);
3239         else if (CONFIG_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
3240             rv10_encode_picture_header(s, picture_number);
3241         else if (CONFIG_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
3242             rv20_encode_picture_header(s, picture_number);
3243         else if (CONFIG_FLV_ENCODER && s->codec_id == CODEC_ID_FLV1)
3244             ff_flv_encode_picture_header(s, picture_number);
3245         else if (CONFIG_H263_ENCODER)
3246             h263_encode_picture_header(s, picture_number);
3247         break;
3248     case FMT_MPEG1:
3249         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3250             mpeg1_encode_picture_header(s, picture_number);
3251         break;
3252     case FMT_H264:
3253         break;
3254     default:
3255         assert(0);
3256     }
3257     bits= put_bits_count(&s->pb);
3258     s->header_bits= bits - s->last_bits;
3259
3260     for(i=1; i<context_count; i++){
3261         update_duplicate_context_after_me(s->thread_context[i], s);
3262     }
3263     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3264     for(i=1; i<context_count; i++){
3265         merge_context_after_encode(s, s->thread_context[i]);
3266     }
3267     emms_c();
3268     return 0;
3269 }
3270
3271 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3272     const int intra= s->mb_intra;
3273     int i;
3274
3275     s->dct_count[intra]++;
3276
3277     for(i=0; i<64; i++){
3278         int level= block[i];
3279
3280         if(level){
3281             if(level>0){
3282                 s->dct_error_sum[intra][i] += level;
3283                 level -= s->dct_offset[intra][i];
3284                 if(level<0) level=0;
3285             }else{
3286                 s->dct_error_sum[intra][i] -= level;
3287                 level += s->dct_offset[intra][i];
3288                 if(level>0) level=0;
3289             }
3290             block[i]= level;
3291         }
3292     }
3293 }
3294
3295 static int dct_quantize_trellis_c(MpegEncContext *s,
3296                                   DCTELEM *block, int n,
3297                                   int qscale, int *overflow){
3298     const int *qmat;
3299     const uint8_t *scantable= s->intra_scantable.scantable;
3300     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3301     int max=0;
3302     unsigned int threshold1, threshold2;
3303     int bias=0;
3304     int run_tab[65];
3305     int level_tab[65];
3306     int score_tab[65];
3307     int survivor[65];
3308     int survivor_count;
3309     int last_run=0;
3310     int last_level=0;
3311     int last_score= 0;
3312     int last_i;
3313     int coeff[2][64];
3314     int coeff_count[64];
3315     int qmul, qadd, start_i, last_non_zero, i, dc;
3316     const int esc_length= s->ac_esc_length;
3317     uint8_t * length;
3318     uint8_t * last_length;
3319     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3320
3321     s->dsp.fdct (block);
3322
3323     if(s->dct_error_sum)
3324         s->denoise_dct(s, block);
3325     qmul= qscale*16;
3326     qadd= ((qscale-1)|1)*8;
3327
3328     if (s->mb_intra) {
3329         int q;
3330         if (!s->h263_aic) {
3331             if (n < 4)
3332                 q = s->y_dc_scale;
3333             else
3334                 q = s->c_dc_scale;
3335             q = q << 3;
3336         } else{
3337             /* For AIC we skip quant/dequant of INTRADC */
3338             q = 1 << 3;
3339             qadd=0;
3340         }
3341
3342         /* note: block[0] is assumed to be positive */
3343         block[0] = (block[0] + (q >> 1)) / q;
3344         start_i = 1;
3345         last_non_zero = 0;
3346         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3347         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3348             bias= 1<<(QMAT_SHIFT-1);
3349         length     = s->intra_ac_vlc_length;
3350         last_length= s->intra_ac_vlc_last_length;
3351     } else {
3352         start_i = 0;
3353         last_non_zero = -1;
3354         qmat = s->q_inter_matrix[qscale];
3355         length     = s->inter_ac_vlc_length;
3356         last_length= s->inter_ac_vlc_last_length;
3357     }
3358     last_i= start_i;
3359
3360     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3361     threshold2= (threshold1<<1);
3362
3363     for(i=63; i>=start_i; i--) {
3364         const int j = scantable[i];
3365         int level = block[j] * qmat[j];
3366
3367         if(((unsigned)(level+threshold1))>threshold2){
3368             last_non_zero = i;
3369             break;
3370         }
3371     }
3372
3373     for(i=start_i; i<=last_non_zero; i++) {
3374         const int j = scantable[i];
3375         int level = block[j] * qmat[j];
3376
3377 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3378 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3379         if(((unsigned)(level+threshold1))>threshold2){
3380             if(level>0){
3381                 level= (bias + level)>>QMAT_SHIFT;
3382                 coeff[0][i]= level;
3383                 coeff[1][i]= level-1;
3384 //                coeff[2][k]= level-2;
3385             }else{
3386                 level= (bias - level)>>QMAT_SHIFT;
3387                 coeff[0][i]= -level;
3388                 coeff[1][i]= -level+1;
3389 //                coeff[2][k]= -level+2;
3390             }
3391             coeff_count[i]= FFMIN(level, 2);
3392             assert(coeff_count[i]);
3393             max |=level;
3394         }else{
3395             coeff[0][i]= (level>>31)|1;
3396             coeff_count[i]= 1;
3397         }
3398     }
3399
3400     *overflow= s->max_qcoeff < max; //overflow might have happened
3401
3402     if(last_non_zero < start_i){
3403         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3404         return last_non_zero;
3405     }
3406
3407     score_tab[start_i]= 0;
3408     survivor[0]= start_i;
3409     survivor_count= 1;
3410
3411     for(i=start_i; i<=last_non_zero; i++){
3412         int level_index, j, zero_distortion;
3413         int dct_coeff= FFABS(block[ scantable[i] ]);
3414         int best_score=256*256*256*120;
3415
3416         if (   s->dsp.fdct == fdct_ifast
3417 #ifndef FAAN_POSTSCALE
3418             || s->dsp.fdct == ff_faandct
3419 #endif
3420            )
3421             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3422         zero_distortion= dct_coeff*dct_coeff;
3423
3424         for(level_index=0; level_index < coeff_count[i]; level_index++){
3425             int distortion;
3426             int level= coeff[level_index][i];
3427             const int alevel= FFABS(level);
3428             int unquant_coeff;
3429
3430             assert(level);
3431
3432             if(s->out_format == FMT_H263){
3433                 unquant_coeff= alevel*qmul + qadd;
3434             }else{ //MPEG1
3435                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3436                 if(s->mb_intra){
3437                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3438                         unquant_coeff =   (unquant_coeff - 1) | 1;
3439                 }else{
3440                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3441                         unquant_coeff =   (unquant_coeff - 1) | 1;
3442                 }
3443                 unquant_coeff<<= 3;
3444             }
3445
3446             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3447             level+=64;
3448             if((level&(~127)) == 0){
3449                 for(j=survivor_count-1; j>=0; j--){
3450                     int run= i - survivor[j];
3451                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3452                     score += score_tab[i-run];
3453
3454                     if(score < best_score){
3455                         best_score= score;
3456                         run_tab[i+1]= run;
3457                         level_tab[i+1]= level-64;
3458                     }
3459                 }
3460
3461                 if(s->out_format == FMT_H263){
3462                     for(j=survivor_count-1; j>=0; j--){
3463                         int run= i - survivor[j];
3464                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3465                         score += score_tab[i-run];
3466                         if(score < last_score){
3467                             last_score= score;
3468                             last_run= run;
3469                             last_level= level-64;
3470                             last_i= i+1;
3471                         }
3472                     }
3473                 }
3474             }else{
3475                 distortion += esc_length*lambda;
3476                 for(j=survivor_count-1; j>=0; j--){
3477                     int run= i - survivor[j];
3478                     int score= distortion + score_tab[i-run];
3479
3480                     if(score < best_score){
3481                         best_score= score;
3482                         run_tab[i+1]= run;
3483                         level_tab[i+1]= level-64;
3484                     }
3485                 }
3486
3487                 if(s->out_format == FMT_H263){
3488                   for(j=survivor_count-1; j>=0; j--){
3489                         int run= i - survivor[j];
3490                         int score= distortion + score_tab[i-run];
3491                         if(score < last_score){
3492                             last_score= score;
3493                             last_run= run;
3494                             last_level= level-64;
3495                             last_i= i+1;
3496                         }
3497                     }
3498                 }
3499             }
3500         }
3501
3502         score_tab[i+1]= best_score;
3503
3504         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3505         if(last_non_zero <= 27){
3506             for(; survivor_count; survivor_count--){
3507                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3508                     break;
3509             }
3510         }else{
3511             for(; survivor_count; survivor_count--){
3512                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3513                     break;
3514             }
3515         }
3516
3517         survivor[ survivor_count++ ]= i+1;
3518     }
3519
3520     if(s->out_format != FMT_H263){
3521         last_score= 256*256*256*120;
3522         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3523             int score= score_tab[i];
3524             if(i) score += lambda*2; //FIXME exacter?
3525
3526             if(score < last_score){
3527                 last_score= score;
3528                 last_i= i;
3529                 last_level= level_tab[i];
3530                 last_run= run_tab[i];
3531             }
3532         }
3533     }
3534
3535     s->coded_score[n] = last_score;
3536
3537     dc= FFABS(block[0]);
3538     last_non_zero= last_i - 1;
3539     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3540
3541     if(last_non_zero < start_i)
3542         return last_non_zero;
3543
3544     if(last_non_zero == 0 && start_i == 0){
3545         int best_level= 0;
3546         int best_score= dc * dc;
3547
3548         for(i=0; i<coeff_count[0]; i++){
3549             int level= coeff[i][0];
3550             int alevel= FFABS(level);
3551             int unquant_coeff, score, distortion;
3552
3553             if(s->out_format == FMT_H263){
3554                     unquant_coeff= (alevel*qmul + qadd)>>3;
3555             }else{ //MPEG1
3556                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3557                     unquant_coeff =   (unquant_coeff - 1) | 1;
3558             }
3559             unquant_coeff = (unquant_coeff + 4) >> 3;
3560             unquant_coeff<<= 3 + 3;
3561
3562             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3563             level+=64;
3564             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3565             else                    score= distortion + esc_length*lambda;
3566
3567             if(score < best_score){
3568                 best_score= score;
3569                 best_level= level - 64;
3570             }
3571         }
3572         block[0]= best_level;
3573         s->coded_score[n] = best_score - dc*dc;
3574         if(best_level == 0) return -1;
3575         else                return last_non_zero;
3576     }
3577
3578     i= last_i;
3579     assert(last_level);
3580
3581     block[ perm_scantable[last_non_zero] ]= last_level;
3582     i -= last_run + 1;
3583
3584     for(; i>start_i; i -= run_tab[i] + 1){
3585         block[ perm_scantable[i-1] ]= level_tab[i];
3586     }
3587
3588     return last_non_zero;
3589 }
3590
3591 //#define REFINE_STATS 1
3592 static int16_t basis[64][64];
3593
3594 static void build_basis(uint8_t *perm){
3595     int i, j, x, y;
3596     emms_c();
3597     for(i=0; i<8; i++){
3598         for(j=0; j<8; j++){
3599             for(y=0; y<8; y++){
3600                 for(x=0; x<8; x++){
3601                     double s= 0.25*(1<<BASIS_SHIFT);
3602                     int index= 8*i + j;
3603                     int perm_index= perm[index];
3604                     if(i==0) s*= sqrt(0.5);
3605                     if(j==0) s*= sqrt(0.5);
3606                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3607                 }
3608             }
3609         }
3610     }
3611 }
3612
3613 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3614                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3615                         int n, int qscale){
3616     int16_t rem[64];
3617     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3618     const uint8_t *scantable= s->intra_scantable.scantable;
3619     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3620 //    unsigned int threshold1, threshold2;
3621 //    int bias=0;
3622     int run_tab[65];
3623     int prev_run=0;
3624     int prev_level=0;
3625     int qmul, qadd, start_i, last_non_zero, i, dc;
3626     uint8_t * length;
3627     uint8_t * last_length;
3628     int lambda;
3629     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3630 #ifdef REFINE_STATS
3631 static int count=0;
3632 static int after_last=0;
3633 static int to_zero=0;
3634 static int from_zero=0;
3635 static int raise=0;
3636 static int lower=0;
3637 static int messed_sign=0;
3638 #endif
3639
3640     if(basis[0][0] == 0)
3641         build_basis(s->dsp.idct_permutation);
3642
3643     qmul= qscale*2;
3644     qadd= (qscale-1)|1;
3645     if (s->mb_intra) {
3646         if (!s->h263_aic) {
3647             if (n < 4)
3648                 q = s->y_dc_scale;
3649             else
3650                 q = s->c_dc_scale;
3651         } else{
3652             /* For AIC we skip quant/dequant of INTRADC */
3653             q = 1;
3654             qadd=0;
3655         }
3656         q <<= RECON_SHIFT-3;
3657         /* note: block[0] is assumed to be positive */
3658         dc= block[0]*q;
3659 //        block[0] = (block[0] + (q >> 1)) / q;
3660         start_i = 1;
3661 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3662 //            bias= 1<<(QMAT_SHIFT-1);
3663         length     = s->intra_ac_vlc_length;
3664         last_length= s->intra_ac_vlc_last_length;
3665     } else {
3666         dc= 0;
3667         start_i = 0;
3668         length     = s->inter_ac_vlc_length;
3669         last_length= s->inter_ac_vlc_last_length;
3670     }
3671     last_non_zero = s->block_last_index[n];
3672
3673 #ifdef REFINE_STATS
3674 {START_TIMER
3675 #endif
3676     dc += (1<<(RECON_SHIFT-1));
3677     for(i=0; i<64; i++){
3678         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3679     }
3680 #ifdef REFINE_STATS
3681 STOP_TIMER("memset rem[]")}
3682 #endif
3683     sum=0;
3684     for(i=0; i<64; i++){
3685         int one= 36;
3686         int qns=4;
3687         int w;
3688
3689         w= FFABS(weight[i]) + qns*one;
3690         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3691
3692         weight[i] = w;
3693 //        w=weight[i] = (63*qns + (w/2)) / w;
3694
3695         assert(w>0);
3696         assert(w<(1<<6));
3697         sum += w*w;
3698     }
3699     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3700 #ifdef REFINE_STATS
3701 {START_TIMER
3702 #endif
3703     run=0;
3704     rle_index=0;
3705     for(i=start_i; i<=last_non_zero; i++){
3706         int j= perm_scantable[i];
3707         const int level= block[j];
3708         int coeff;
3709
3710         if(level){
3711             if(level<0) coeff= qmul*level - qadd;
3712             else        coeff= qmul*level + qadd;
3713             run_tab[rle_index++]=run;
3714             run=0;
3715
3716             s->dsp.add_8x8basis(rem, basis[j], coeff);
3717         }else{
3718             run++;
3719         }
3720     }
3721 #ifdef REFINE_STATS
3722 if(last_non_zero>0){
3723 STOP_TIMER("init rem[]")
3724 }
3725 }
3726
3727 {START_TIMER
3728 #endif
3729     for(;;){
3730         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3731         int best_coeff=0;
3732         int best_change=0;
3733         int run2, best_unquant_change=0, analyze_gradient;
3734 #ifdef REFINE_STATS
3735 {START_TIMER
3736 #endif
3737         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
3738
3739         if(analyze_gradient){
3740 #ifdef REFINE_STATS
3741 {START_TIMER
3742 #endif
3743             for(i=0; i<64; i++){
3744                 int w= weight[i];
3745
3746                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3747             }
3748 #ifdef REFINE_STATS
3749 STOP_TIMER("rem*w*w")}
3750 {START_TIMER
3751 #endif
3752             s->dsp.fdct(d1);
3753 #ifdef REFINE_STATS
3754 STOP_TIMER("dct")}
3755 #endif
3756         }
3757
3758         if(start_i){
3759             const int level= block[0];
3760             int change, old_coeff;
3761
3762             assert(s->mb_intra);
3763
3764             old_coeff= q*level;
3765
3766             for(change=-1; change<=1; change+=2){
3767                 int new_level= level + change;
3768                 int score, new_coeff;
3769
3770                 new_coeff= q*new_level;
3771                 if(new_coeff >= 2048 || new_coeff < 0)
3772                     continue;
3773
3774                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3775                 if(score<best_score){
3776                     best_score= score;
3777                     best_coeff= 0;
3778                     best_change= change;
3779                     best_unquant_change= new_coeff - old_coeff;
3780                 }
3781             }
3782         }
3783
3784         run=0;
3785         rle_index=0;
3786         run2= run_tab[rle_index++];
3787         prev_level=0;
3788         prev_run=0;
3789
3790         for(i=start_i; i<64; i++){
3791             int j= perm_scantable[i];
3792             const int level= block[j];
3793             int change, old_coeff;
3794
3795             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3796                 break;
3797
3798             if(level){
3799                 if(level<0) old_coeff= qmul*level - qadd;
3800                 else        old_coeff= qmul*level + qadd;
3801                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3802             }else{
3803                 old_coeff=0;
3804                 run2--;
3805                 assert(run2>=0 || i >= last_non_zero );
3806             }
3807
3808             for(change=-1; change<=1; change+=2){
3809                 int new_level= level + change;
3810                 int score, new_coeff, unquant_change;
3811
3812                 score=0;
3813                 if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3814                    continue;
3815
3816                 if(new_level){
3817                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3818                     else            new_coeff= qmul*new_level + qadd;
3819                     if(new_coeff >= 2048 || new_coeff <= -2048)
3820                         continue;
3821                     //FIXME check for overflow
3822
3823                     if(level){
3824                         if(level < 63 && level > -63){
3825                             if(i < last_non_zero)
3826                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3827                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3828                             else
3829                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3830                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3831                         }
3832                     }else{
3833                         assert(FFABS(new_level)==1);
3834
3835                         if(analyze_gradient){
3836                             int g= d1[ scantable[i] ];
3837                             if(g && (g^new_level) >= 0)
3838                                 continue;
3839                         }
3840
3841                         if(i < last_non_zero){
3842                             int next_i= i + run2 + 1;
3843                             int next_level= block[ perm_scantable[next_i] ] + 64;
3844
3845                             if(next_level&(~127))
3846                                 next_level= 0;
3847
3848                             if(next_i < last_non_zero)
3849                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3850                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3851                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3852                             else
3853                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3854                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3855                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3856                         }else{
3857                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3858                             if(prev_level){
3859                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3860                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3861                             }
3862                         }
3863                     }
3864                 }else{
3865                     new_coeff=0;
3866                     assert(FFABS(level)==1);
3867
3868                     if(i < last_non_zero){
3869                         int next_i= i + run2 + 1;
3870                         int next_level= block[ perm_scantable[next_i] ] + 64;
3871
3872                         if(next_level&(~127))
3873                             next_level= 0;
3874
3875                         if(next_i < last_non_zero)
3876                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3877                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3878                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3879                         else
3880                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3881                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3882                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3883                     }else{
3884                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3885                         if(prev_level){
3886                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3887                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3888                         }
3889                     }
3890                 }
3891
3892                 score *= lambda;
3893
3894                 unquant_change= new_coeff - old_coeff;
3895                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3896
3897                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3898                 if(score<best_score){
3899                     best_score= score;
3900                     best_coeff= i;
3901                     best_change= change;
3902                     best_unquant_change= unquant_change;
3903                 }
3904             }
3905             if(level){
3906                 prev_level= level + 64;
3907                 if(prev_level&(~127))
3908                     prev_level= 0;
3909                 prev_run= run;
3910                 run=0;
3911             }else{
3912                 run++;
3913             }
3914         }
3915 #ifdef REFINE_STATS
3916 STOP_TIMER("iterative step")}
3917 #endif
3918
3919         if(best_change){
3920             int j= perm_scantable[ best_coeff ];
3921
3922             block[j] += best_change;
3923
3924             if(best_coeff > last_non_zero){
3925                 last_non_zero= best_coeff;
3926                 assert(block[j]);
3927 #ifdef REFINE_STATS
3928 after_last++;
3929 #endif
3930             }else{
3931 #ifdef REFINE_STATS
3932 if(block[j]){
3933     if(block[j] - best_change){
3934         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3935             raise++;
3936         }else{
3937             lower++;
3938         }
3939     }else{
3940         from_zero++;
3941     }
3942 }else{
3943     to_zero++;
3944 }
3945 #endif
3946                 for(; last_non_zero>=start_i; last_non_zero--){
3947                     if(block[perm_scantable[last_non_zero]])
3948                         break;
3949                 }
3950             }
3951 #ifdef REFINE_STATS
3952 count++;
3953 if(256*256*256*64 % count == 0){
3954     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
3955 }
3956 #endif
3957             run=0;
3958             rle_index=0;
3959             for(i=start_i; i<=last_non_zero; i++){
3960                 int j= perm_scantable[i];
3961                 const int level= block[j];
3962
3963                  if(level){
3964                      run_tab[rle_index++]=run;
3965                      run=0;
3966                  }else{
3967                      run++;
3968                  }
3969             }
3970
3971             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
3972         }else{
3973             break;
3974         }
3975     }
3976 #ifdef REFINE_STATS
3977 if(last_non_zero>0){
3978 STOP_TIMER("iterative search")
3979 }
3980 }
3981 #endif
3982
3983     return last_non_zero;
3984 }
3985
3986 int dct_quantize_c(MpegEncContext *s,
3987                         DCTELEM *block, int n,
3988                         int qscale, int *overflow)
3989 {
3990     int i, j, level, last_non_zero, q, start_i;
3991     const int *qmat;
3992     const uint8_t *scantable= s->intra_scantable.scantable;
3993     int bias;
3994     int max=0;
3995     unsigned int threshold1, threshold2;
3996
3997     s->dsp.fdct (block);
3998
3999     if(s->dct_error_sum)
4000         s->denoise_dct(s, block);
4001
4002     if (s->mb_intra) {
4003         if (!s->h263_aic) {
4004             if (n < 4)
4005                 q = s->y_dc_scale;
4006             else
4007                 q = s->c_dc_scale;
4008             q = q << 3;
4009         } else
4010             /* For AIC we skip quant/dequant of INTRADC */
4011             q = 1 << 3;
4012
4013         /* note: block[0] is assumed to be positive */
4014         block[0] = (block[0] + (q >> 1)) / q;
4015         start_i = 1;
4016         last_non_zero = 0;
4017         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4018         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4019     } else {
4020         start_i = 0;
4021         last_non_zero = -1;
4022         qmat = s->q_inter_matrix[qscale];
4023         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4024     }
4025     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4026     threshold2= (threshold1<<1);
4027     for(i=63;i>=start_i;i--) {
4028         j = scantable[i];
4029         level = block[j] * qmat[j];
4030
4031         if(((unsigned)(level+threshold1))>threshold2){
4032             last_non_zero = i;
4033             break;
4034         }else{
4035             block[j]=0;
4036         }
4037     }
4038     for(i=start_i; i<=last_non_zero; i++) {
4039         j = scantable[i];
4040         level = block[j] * qmat[j];
4041
4042 //        if(   bias+level >= (1<<QMAT_SHIFT)
4043 //           || bias-level >= (1<<QMAT_SHIFT)){
4044         if(((unsigned)(level+threshold1))>threshold2){
4045             if(level>0){
4046                 level= (bias + level)>>QMAT_SHIFT;
4047                 block[j]= level;
4048             }else{
4049                 level= (bias - level)>>QMAT_SHIFT;
4050                 block[j]= -level;
4051             }
4052             max |=level;
4053         }else{
4054             block[j]=0;
4055         }
4056     }
4057     *overflow= s->max_qcoeff < max; //overflow might have happened
4058
4059     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4060     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4061         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4062
4063     return last_non_zero;
4064 }
4065
4066 #define OFFSET(x) offsetof(MpegEncContext, x)
4067 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4068 static const AVOption h263_options[] = {
4069     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4070     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4071     { NULL },
4072 };
4073
4074 static const AVClass h263_class = {
4075     .class_name = "H.263 encoder",
4076     .item_name  = av_default_item_name,
4077     .option     = h263_options,
4078     .version    = LIBAVUTIL_VERSION_INT,
4079 };
4080
4081 AVCodec ff_h263_encoder = {
4082     .name           = "h263",
4083     .type           = AVMEDIA_TYPE_VIDEO,
4084     .id             = CODEC_ID_H263,
4085     .priv_data_size = sizeof(MpegEncContext),
4086     .init           = MPV_encode_init,
4087     .encode         = MPV_encode_picture,
4088     .close          = MPV_encode_end,
4089     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4090     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4091     .priv_class     = &h263_class,
4092 };
4093
4094 static const AVOption h263p_options[] = {
4095     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4096     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4097     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4098     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4099     { NULL },
4100 };
4101 static const AVClass h263p_class = {
4102     .class_name = "H.263p encoder",
4103     .item_name  = av_default_item_name,
4104     .option     = h263p_options,
4105     .version    = LIBAVUTIL_VERSION_INT,
4106 };
4107
4108 AVCodec ff_h263p_encoder = {
4109     .name           = "h263p",
4110     .type           = AVMEDIA_TYPE_VIDEO,
4111     .id             = CODEC_ID_H263P,
4112     .priv_data_size = sizeof(MpegEncContext),
4113     .init           = MPV_encode_init,
4114     .encode         = MPV_encode_picture,
4115     .close          = MPV_encode_end,
4116     .capabilities = CODEC_CAP_SLICE_THREADS,
4117     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4118     .long_name= NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4119     .priv_class     = &h263p_class,
4120 };
4121
4122 AVCodec ff_msmpeg4v2_encoder = {
4123     .name           = "msmpeg4v2",
4124     .type           = AVMEDIA_TYPE_VIDEO,
4125     .id             = CODEC_ID_MSMPEG4V2,
4126     .priv_data_size = sizeof(MpegEncContext),
4127     .init           = MPV_encode_init,
4128     .encode         = MPV_encode_picture,
4129     .close          = MPV_encode_end,
4130     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4131     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4132 };
4133
4134 AVCodec ff_msmpeg4v3_encoder = {
4135     .name           = "msmpeg4",
4136     .type           = AVMEDIA_TYPE_VIDEO,
4137     .id             = CODEC_ID_MSMPEG4V3,
4138     .priv_data_size = sizeof(MpegEncContext),
4139     .init           = MPV_encode_init,
4140     .encode         = MPV_encode_picture,
4141     .close          = MPV_encode_end,
4142     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4143     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4144 };
4145
4146 AVCodec ff_wmv1_encoder = {
4147     .name           = "wmv1",
4148     .type           = AVMEDIA_TYPE_VIDEO,
4149     .id             = CODEC_ID_WMV1,
4150     .priv_data_size = sizeof(MpegEncContext),
4151     .init           = MPV_encode_init,
4152     .encode         = MPV_encode_picture,
4153     .close          = MPV_encode_end,
4154     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4155     .long_name= NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4156 };