git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "dsputil.h"
  41 #include "idctdsp.h"
  42 #include "mpeg12.h"
  43 #include "mpegvideo.h"
  44 #include "h261.h"
  45 #include "h263.h"
  46 #include "mjpegenc_common.h"
  47 #include "mathops.h"
  48 #include "mpegutils.h"
  49 #include "mjpegenc.h"
  50 #include "msmpeg4.h"
  51 #include "qpeldsp.h"
  52 #include "faandct.h"
  53 #include "thread.h"
  54 #include "aandcttab.h"
  55 #include "flv.h"
  56 #include "mpeg4video.h"
  57 #include "internal.h"
  58 #include "bytestream.h"
  59 #include <limits.h>
  60 #include "sp5x.h"
  61
  62 static int encode_picture(MpegEncContext *s, int picture_number);
  63 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  64 static int sse_mb(MpegEncContext *s);
  65 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  66 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  67
  68 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  69 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  70
  71 const AVOption ff_mpv_generic_options[] = {
  72     FF_MPV_COMMON_OPTS
  73     { NULL },
  74 };
  75
  76 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  77                        uint16_t (*qmat16)[2][64],
  78                        const uint16_t *quant_matrix,
  79                        int bias, int qmin, int qmax, int intra)
  80 {
  81     DSPContext *dsp = &s->dsp;
  82     int qscale;
  83     int shift = 0;
  84
  85     for (qscale = qmin; qscale <= qmax; qscale++) {
  86         int i;
  87         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
  88             dsp->fdct == ff_jpeg_fdct_islow_10 ||
  89             dsp->fdct == ff_faandct) {
  90             for (i = 0; i < 64; i++) {
  91                 const int j = s->idsp.idct_permutation[i];
  92                 /* 16 <= qscale * quant_matrix[i] <= 7905
  93                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  94                  *             19952 <=              x  <= 249205026
  95                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
  96                  *           3444240 >= (1 << 36) / (x) >= 275 */
  97
  98                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
  99                                         (qscale * quant_matrix[j]));
 100             }
 101         } else if (dsp->fdct == ff_fdct_ifast) {
 102             for (i = 0; i < 64; i++) {
 103                 const int j = s->idsp.idct_permutation[i];
 104                 /* 16 <= qscale * quant_matrix[i] <= 7905
 105                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 106                  *             19952 <=              x  <= 249205026
 107                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 108                  *           3444240 >= (1 << 36) / (x) >= 275 */
 109
 110                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 111                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
 112             }
 113         } else {
 114             for (i = 0; i < 64; i++) {
 115                 const int j = s->idsp.idct_permutation[i];
 116                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 117                  * Assume x = qscale * quant_matrix[i]
 118                  * So             16 <=              x  <= 7905
 119                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 120                  * so          32768 >= (1 << 19) / (x) >= 67 */
 121                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 122                                         (qscale * quant_matrix[j]));
 123                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 124                 //                    (qscale * quant_matrix[i]);
 125                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 126                                        (qscale * quant_matrix[j]);
 127
 128                 if (qmat16[qscale][0][i] == 0 ||
 129                     qmat16[qscale][0][i] == 128 * 256)
 130                     qmat16[qscale][0][i] = 128 * 256 - 1;
 131                 qmat16[qscale][1][i] =
 132                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 133                                 qmat16[qscale][0][i]);
 134             }
 135         }
 136
 137         for (i = intra; i < 64; i++) {
 138             int64_t max = 8191;
 139             if (dsp->fdct == ff_fdct_ifast) {
 140                 max = (8191LL * ff_aanscales[i]) >> 14;
 141             }
 142             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 143                 shift++;
 144             }
 145         }
 146     }
 147     if (shift) {
 148         av_log(NULL, AV_LOG_INFO,
 149                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 150                QMAT_SHIFT - shift);
 151     }
 152 }
 153
 154 static inline void update_qscale(MpegEncContext *s)
 155 {
 156     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 157                 (FF_LAMBDA_SHIFT + 7);
 158     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 159
 160     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 161                  FF_LAMBDA_SHIFT;
 162 }
 163
 164 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 165 {
 166     int i;
 167
 168     if (matrix) {
 169         put_bits(pb, 1, 1);
 170         for (i = 0; i < 64; i++) {
 171             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 172         }
 173     } else
 174         put_bits(pb, 1, 0);
 175 }
 176
 177 /**
 178  * init s->current_picture.qscale_table from s->lambda_table
 179  */
 180 void ff_init_qscale_tab(MpegEncContext *s)
 181 {
 182     int8_t * const qscale_table = s->current_picture.qscale_table;
 183     int i;
 184
 185     for (i = 0; i < s->mb_num; i++) {
 186         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 187         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 188         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 189                                                   s->avctx->qmax);
 190     }
 191 }
 192
 193 static void update_duplicate_context_after_me(MpegEncContext *dst,
 194                                               MpegEncContext *src)
 195 {
 196 #define COPY(a) dst->a= src->a
 197     COPY(pict_type);
 198     COPY(current_picture);
 199     COPY(f_code);
 200     COPY(b_code);
 201     COPY(qscale);
 202     COPY(lambda);
 203     COPY(lambda2);
 204     COPY(picture_in_gop_number);
 205     COPY(gop_picture_number);
 206     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 207     COPY(progressive_frame);    // FIXME don't set in encode_header
 208     COPY(partitioned_frame);    // FIXME don't set in encode_header
 209 #undef COPY
 210 }
 211
 212 /**
 213  * Set the given MpegEncContext to defaults for encoding.
 214  * the changed fields will not depend upon the prior state of the MpegEncContext.
 215  */
 216 static void MPV_encode_defaults(MpegEncContext *s)
 217 {
 218     int i;
 219     ff_MPV_common_defaults(s);
 220
 221     for (i = -16; i < 16; i++) {
 222         default_fcode_tab[i + MAX_MV] = 1;
 223     }
 224     s->me.mv_penalty = default_mv_penalty;
 225     s->fcode_tab     = default_fcode_tab;
 226
 227     s->input_picture_number  = 0;
 228     s->picture_in_gop_number = 0;
 229 }
 230
 231 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 232     if (ARCH_X86)
 233         ff_dct_encode_init_x86(s);
 234
 235     if (CONFIG_H263_ENCODER)
 236         ff_h263dsp_init(&s->h263dsp);
 237     if (!s->dct_quantize)
 238         s->dct_quantize = ff_dct_quantize_c;
 239     if (!s->denoise_dct)
 240         s->denoise_dct  = denoise_dct_c;
 241     s->fast_dct_quantize = s->dct_quantize;
 242     if (s->avctx->trellis)
 243         s->dct_quantize  = dct_quantize_trellis_c;
 244
 245     return 0;
 246 }
 247
 248 /* init video encoder */
 249 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
 250 {
 251     MpegEncContext *s = avctx->priv_data;
 252     int i, ret, format_supported;
 253
 254     MPV_encode_defaults(s);
 255
 256     switch (avctx->codec_id) {
 257     case AV_CODEC_ID_MPEG2VIDEO:
 258         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 259             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 260             av_log(avctx, AV_LOG_ERROR,
 261                    "only YUV420 and YUV422 are supported\n");
 262             return -1;
 263         }
 264         break;
 265     case AV_CODEC_ID_MJPEG:
 266     case AV_CODEC_ID_AMV:
 267         format_supported = 0;
 268         /* JPEG color space */
 269         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 270             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 271             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
 272             (avctx->color_range == AVCOL_RANGE_JPEG &&
 273              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 274               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 275               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
 276             format_supported = 1;
 277         /* MPEG color space */
 278         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 279                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 280                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 281                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
 282             format_supported = 1;
 283
 284         if (!format_supported) {
 285             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 286             return -1;
 287         }
 288         break;
 289     default:
 290         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 291             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 292             return -1;
 293         }
 294     }
 295
 296     switch (avctx->pix_fmt) {
 297     case AV_PIX_FMT_YUVJ444P:
 298     case AV_PIX_FMT_YUV444P:
 299         s->chroma_format = CHROMA_444;
 300         break;
 301     case AV_PIX_FMT_YUVJ422P:
 302     case AV_PIX_FMT_YUV422P:
 303         s->chroma_format = CHROMA_422;
 304         break;
 305     case AV_PIX_FMT_YUVJ420P:
 306     case AV_PIX_FMT_YUV420P:
 307     default:
 308         s->chroma_format = CHROMA_420;
 309         break;
 310     }
 311
 312     s->bit_rate = avctx->bit_rate;
 313     s->width    = avctx->width;
 314     s->height   = avctx->height;
 315     if (avctx->gop_size > 600 &&
 316         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 317         av_log(avctx, AV_LOG_WARNING,
 318                "keyframe interval too large!, reducing it from %d to %d\n",
 319                avctx->gop_size, 600);
 320         avctx->gop_size = 600;
 321     }
 322     s->gop_size     = avctx->gop_size;
 323     s->avctx        = avctx;
 324     s->flags        = avctx->flags;
 325     s->flags2       = avctx->flags2;
 326     if (avctx->max_b_frames > MAX_B_FRAMES) {
 327         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 328                "is %d.\n", MAX_B_FRAMES);
 329         avctx->max_b_frames = MAX_B_FRAMES;
 330     }
 331     s->max_b_frames = avctx->max_b_frames;
 332     s->codec_id     = avctx->codec->id;
 333     s->strict_std_compliance = avctx->strict_std_compliance;
 334     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 335     s->mpeg_quant         = avctx->mpeg_quant;
 336     s->rtp_mode           = !!avctx->rtp_payload_size;
 337     s->intra_dc_precision = avctx->intra_dc_precision;
 338     s->user_specified_pts = AV_NOPTS_VALUE;
 339
 340     if (s->gop_size <= 1) {
 341         s->intra_only = 1;
 342         s->gop_size   = 12;
 343     } else {
 344         s->intra_only = 0;
 345     }
 346
 347     s->me_method = avctx->me_method;
 348
 349     /* Fixed QSCALE */
 350     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 351
 352     s->adaptive_quant = (s->avctx->lumi_masking ||
 353                          s->avctx->dark_masking ||
 354                          s->avctx->temporal_cplx_masking ||
 355                          s->avctx->spatial_cplx_masking  ||
 356                          s->avctx->p_masking      ||
 357                          s->avctx->border_masking ||
 358                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 359                         !s->fixed_qscale;
 360
 361     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 362
 363     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 364         switch(avctx->codec_id) {
 365         case AV_CODEC_ID_MPEG1VIDEO:
 366         case AV_CODEC_ID_MPEG2VIDEO:
 367             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
 368             break;
 369         case AV_CODEC_ID_MPEG4:
 370         case AV_CODEC_ID_MSMPEG4V1:
 371         case AV_CODEC_ID_MSMPEG4V2:
 372         case AV_CODEC_ID_MSMPEG4V3:
 373             if       (avctx->rc_max_rate >= 15000000) {
 374                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
 375             } else if(avctx->rc_max_rate >=  2000000) {
 376                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
 377             } else if(avctx->rc_max_rate >=   384000) {
 378                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
 379             } else
 380                 avctx->rc_buffer_size = 40;
 381             avctx->rc_buffer_size *= 16384;
 382             break;
 383         }
 384         if (avctx->rc_buffer_size) {
 385             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 386         }
 387     }
 388
 389     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 390         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 391         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
 392             return -1;
 393     }
 394
 395     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 396         av_log(avctx, AV_LOG_INFO,
 397                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 398     }
 399
 400     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 401         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 402         return -1;
 403     }
 404
 405     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 406         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 407         return -1;
 408     }
 409
 410     if (avctx->rc_max_rate &&
 411         avctx->rc_max_rate == avctx->bit_rate &&
 412         avctx->rc_max_rate != avctx->rc_min_rate) {
 413         av_log(avctx, AV_LOG_INFO,
 414                "impossible bitrate constraints, this will fail\n");
 415     }
 416
 417     if (avctx->rc_buffer_size &&
 418         avctx->bit_rate * (int64_t)avctx->time_base.num >
 419             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 420         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 421         return -1;
 422     }
 423
 424     if (!s->fixed_qscale &&
 425         avctx->bit_rate * av_q2d(avctx->time_base) >
 426             avctx->bit_rate_tolerance) {
 427         av_log(avctx, AV_LOG_WARNING,
 428                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 429         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 430     }
 431
 432     if (s->avctx->rc_max_rate &&
 433         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 434         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 435          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 436         90000LL * (avctx->rc_buffer_size - 1) >
 437             s->avctx->rc_max_rate * 0xFFFFLL) {
 438         av_log(avctx, AV_LOG_INFO,
 439                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 440                "specified vbv buffer is too large for the given bitrate!\n");
 441     }
 442
 443     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 444         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 445         s->codec_id != AV_CODEC_ID_FLV1) {
 446         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 447         return -1;
 448     }
 449
 450     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 451         av_log(avctx, AV_LOG_ERROR,
 452                "OBMC is only supported with simple mb decision\n");
 453         return -1;
 454     }
 455
 456     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 457         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 458         return -1;
 459     }
 460
 461     if (s->max_b_frames                    &&
 462         s->codec_id != AV_CODEC_ID_MPEG4      &&
 463         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 464         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 465         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 466         return -1;
 467     }
 468     if (s->max_b_frames < 0) {
 469         av_log(avctx, AV_LOG_ERROR,
 470                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 471         return -1;
 472     }
 473
 474     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 475          s->codec_id == AV_CODEC_ID_H263  ||
 476          s->codec_id == AV_CODEC_ID_H263P) &&
 477         (avctx->sample_aspect_ratio.num > 255 ||
 478          avctx->sample_aspect_ratio.den > 255)) {
 479         av_log(avctx, AV_LOG_WARNING,
 480                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 481                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 482         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 483                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 484     }
 485
 486     if ((s->codec_id == AV_CODEC_ID_H263  ||
 487          s->codec_id == AV_CODEC_ID_H263P) &&
 488         (avctx->width  > 2048 ||
 489          avctx->height > 1152 )) {
 490         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 491         return -1;
 492     }
 493     if ((s->codec_id == AV_CODEC_ID_H263  ||
 494          s->codec_id == AV_CODEC_ID_H263P) &&
 495         ((avctx->width &3) ||
 496          (avctx->height&3) )) {
 497         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 498         return -1;
 499     }
 500
 501     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 502         (avctx->width  > 4095 ||
 503          avctx->height > 4095 )) {
 504         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 505         return -1;
 506     }
 507
 508     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 509         (avctx->width  > 16383 ||
 510          avctx->height > 16383 )) {
 511         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 512         return -1;
 513     }
 514
 515     if (s->codec_id == AV_CODEC_ID_RV10 &&
 516         (avctx->width &15 ||
 517          avctx->height&15 )) {
 518         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 519         return AVERROR(EINVAL);
 520     }
 521
 522     if (s->codec_id == AV_CODEC_ID_RV20 &&
 523         (avctx->width &3 ||
 524          avctx->height&3 )) {
 525         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 526         return AVERROR(EINVAL);
 527     }
 528
 529     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 530          s->codec_id == AV_CODEC_ID_WMV2) &&
 531          avctx->width & 1) {
 532          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 533          return -1;
 534     }
 535
 536     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 537         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 538         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 539         return -1;
 540     }
 541
 542     // FIXME mpeg2 uses that too
 543     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 544                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 545         av_log(avctx, AV_LOG_ERROR,
 546                "mpeg2 style quantization not supported by codec\n");
 547         return -1;
 548     }
 549
 550     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 551         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 552         return -1;
 553     }
 554
 555     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 556         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 557         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 558         return -1;
 559     }
 560
 561     if (s->avctx->scenechange_threshold < 1000000000 &&
 562         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 563         av_log(avctx, AV_LOG_ERROR,
 564                "closed gop with scene change detection are not supported yet, "
 565                "set threshold to 1000000000\n");
 566         return -1;
 567     }
 568
 569     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 570         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 571             av_log(avctx, AV_LOG_ERROR,
 572                   "low delay forcing is only available for mpeg2\n");
 573             return -1;
 574         }
 575         if (s->max_b_frames != 0) {
 576             av_log(avctx, AV_LOG_ERROR,
 577                    "b frames cannot be used with low delay\n");
 578             return -1;
 579         }
 580     }
 581
 582     if (s->q_scale_type == 1) {
 583         if (avctx->qmax > 12) {
 584             av_log(avctx, AV_LOG_ERROR,
 585                    "non linear quant only supports qmax <= 12 currently\n");
 586             return -1;
 587         }
 588     }
 589
 590     if (s->avctx->thread_count > 1         &&
 591         s->codec_id != AV_CODEC_ID_MPEG4      &&
 592         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 593         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 594         s->codec_id != AV_CODEC_ID_MJPEG      &&
 595         (s->codec_id != AV_CODEC_ID_H263P)) {
 596         av_log(avctx, AV_LOG_ERROR,
 597                "multi threaded encoding not supported by codec\n");
 598         return -1;
 599     }
 600
 601     if (s->avctx->thread_count < 1) {
 602         av_log(avctx, AV_LOG_ERROR,
 603                "automatic thread number detection not supported by codec, "
 604                "patch welcome\n");
 605         return -1;
 606     }
 607
 608     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 609         s->rtp_mode = 1;
 610
 611     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 612         s->h263_slice_structured = 1;
 613
 614     if (!avctx->time_base.den || !avctx->time_base.num) {
 615         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 616         return -1;
 617     }
 618
 619     i = (INT_MAX / 2 + 128) >> 8;
 620     if (avctx->mb_threshold >= i) {
 621         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 622                i - 1);
 623         return -1;
 624     }
 625
 626     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 627         av_log(avctx, AV_LOG_INFO,
 628                "notice: b_frame_strategy only affects the first pass\n");
 629         avctx->b_frame_strategy = 0;
 630     }
 631
 632     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 633     if (i > 1) {
 634         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 635         avctx->time_base.den /= i;
 636         avctx->time_base.num /= i;
 637         //return -1;
 638     }
 639
 640     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 641         // (a + x * 3 / 8) / x
 642         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 643         s->inter_quant_bias = 0;
 644     } else {
 645         s->intra_quant_bias = 0;
 646         // (a - x / 4) / x
 647         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 648     }
 649
 650     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 651         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 652         return AVERROR(EINVAL);
 653     }
 654
 655     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 656         s->intra_quant_bias = avctx->intra_quant_bias;
 657     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 658         s->inter_quant_bias = avctx->inter_quant_bias;
 659
 660     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 661
 662     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 663         s->avctx->time_base.den > (1 << 16) - 1) {
 664         av_log(avctx, AV_LOG_ERROR,
 665                "timebase %d/%d not supported by MPEG 4 standard, "
 666                "the maximum admitted value for the timebase denominator "
 667                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 668                (1 << 16) - 1);
 669         return -1;
 670     }
 671     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 672
 673     switch (avctx->codec->id) {
 674     case AV_CODEC_ID_MPEG1VIDEO:
 675         s->out_format = FMT_MPEG1;
 676         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 677         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 678         break;
 679     case AV_CODEC_ID_MPEG2VIDEO:
 680         s->out_format = FMT_MPEG1;
 681         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 682         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 683         s->rtp_mode   = 1;
 684         break;
 685     case AV_CODEC_ID_MJPEG:
 686     case AV_CODEC_ID_AMV:
 687         s->out_format = FMT_MJPEG;
 688         s->intra_only = 1; /* force intra only for jpeg */
 689         if (!CONFIG_MJPEG_ENCODER ||
 690             ff_mjpeg_encode_init(s) < 0)
 691             return -1;
 692         avctx->delay = 0;
 693         s->low_delay = 1;
 694         break;
 695     case AV_CODEC_ID_H261:
 696         if (!CONFIG_H261_ENCODER)
 697             return -1;
 698         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 699             av_log(avctx, AV_LOG_ERROR,
 700                    "The specified picture size of %dx%d is not valid for the "
 701                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 702                     s->width, s->height);
 703             return -1;
 704         }
 705         s->out_format = FMT_H261;
 706         avctx->delay  = 0;
 707         s->low_delay  = 1;
 708         break;
 709     case AV_CODEC_ID_H263:
 710         if (!CONFIG_H263_ENCODER)
 711             return -1;
 712         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 713                              s->width, s->height) == 8) {
 714             av_log(avctx, AV_LOG_ERROR,
 715                    "The specified picture size of %dx%d is not valid for "
 716                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 717                    "352x288, 704x576, and 1408x1152. "
 718                    "Try H.263+.\n", s->width, s->height);
 719             return -1;
 720         }
 721         s->out_format = FMT_H263;
 722         avctx->delay  = 0;
 723         s->low_delay  = 1;
 724         break;
 725     case AV_CODEC_ID_H263P:
 726         s->out_format = FMT_H263;
 727         s->h263_plus  = 1;
 728         /* Fx */
 729         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 730         s->modified_quant  = s->h263_aic;
 731         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 732         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 733
 734         /* /Fx */
 735         /* These are just to be sure */
 736         avctx->delay = 0;
 737         s->low_delay = 1;
 738         break;
 739     case AV_CODEC_ID_FLV1:
 740         s->out_format      = FMT_H263;
 741         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 742         s->unrestricted_mv = 1;
 743         s->rtp_mode  = 0; /* don't allow GOB */
 744         avctx->delay = 0;
 745         s->low_delay = 1;
 746         break;
 747     case AV_CODEC_ID_RV10:
 748         s->out_format = FMT_H263;
 749         avctx->delay  = 0;
 750         s->low_delay  = 1;
 751         break;
 752     case AV_CODEC_ID_RV20:
 753         s->out_format      = FMT_H263;
 754         avctx->delay       = 0;
 755         s->low_delay       = 1;
 756         s->modified_quant  = 1;
 757         s->h263_aic        = 1;
 758         s->h263_plus       = 1;
 759         s->loop_filter     = 1;
 760         s->unrestricted_mv = 0;
 761         break;
 762     case AV_CODEC_ID_MPEG4:
 763         s->out_format      = FMT_H263;
 764         s->h263_pred       = 1;
 765         s->unrestricted_mv = 1;
 766         s->low_delay       = s->max_b_frames ? 0 : 1;
 767         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 768         break;
 769     case AV_CODEC_ID_MSMPEG4V2:
 770         s->out_format      = FMT_H263;
 771         s->h263_pred       = 1;
 772         s->unrestricted_mv = 1;
 773         s->msmpeg4_version = 2;
 774         avctx->delay       = 0;
 775         s->low_delay       = 1;
 776         break;
 777     case AV_CODEC_ID_MSMPEG4V3:
 778         s->out_format        = FMT_H263;
 779         s->h263_pred         = 1;
 780         s->unrestricted_mv   = 1;
 781         s->msmpeg4_version   = 3;
 782         s->flipflop_rounding = 1;
 783         avctx->delay         = 0;
 784         s->low_delay         = 1;
 785         break;
 786     case AV_CODEC_ID_WMV1:
 787         s->out_format        = FMT_H263;
 788         s->h263_pred         = 1;
 789         s->unrestricted_mv   = 1;
 790         s->msmpeg4_version   = 4;
 791         s->flipflop_rounding = 1;
 792         avctx->delay         = 0;
 793         s->low_delay         = 1;
 794         break;
 795     case AV_CODEC_ID_WMV2:
 796         s->out_format        = FMT_H263;
 797         s->h263_pred         = 1;
 798         s->unrestricted_mv   = 1;
 799         s->msmpeg4_version   = 5;
 800         s->flipflop_rounding = 1;
 801         avctx->delay         = 0;
 802         s->low_delay         = 1;
 803         break;
 804     default:
 805         return -1;
 806     }
 807
 808     avctx->has_b_frames = !s->low_delay;
 809
 810     s->encoding = 1;
 811
 812     s->progressive_frame    =
 813     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 814                                                 CODEC_FLAG_INTERLACED_ME) ||
 815                                 s->alternate_scan);
 816
 817     /* init */
 818     if (ff_MPV_common_init(s) < 0)
 819         return -1;
 820
 821     ff_qpeldsp_init(&s->qdsp);
 822
 823     s->avctx->coded_frame = s->current_picture.f;
 824
 825     if (s->msmpeg4_version) {
 826         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 827                           2 * 2 * (MAX_LEVEL + 1) *
 828                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 829     }
 830     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 831
 832     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 833     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 834     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 835     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 836     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 837     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 838     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 839                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 840     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 841                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 842
 843     if (s->avctx->noise_reduction) {
 844         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 845                           2 * 64 * sizeof(uint16_t), fail);
 846     }
 847
 848     ff_dct_encode_init(s);
 849
 850     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 851         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 852
 853     s->quant_precision = 5;
 854
 855     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
 856     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
 857
 858     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 859         ff_h261_encode_init(s);
 860     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 861         ff_h263_encode_init(s);
 862     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 863         ff_msmpeg4_encode_init(s);
 864     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 865         && s->out_format == FMT_MPEG1)
 866         ff_mpeg1_encode_init(s);
 867
 868     /* init q matrix */
 869     for (i = 0; i < 64; i++) {
 870         int j = s->idsp.idct_permutation[i];
 871         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 872             s->mpeg_quant) {
 873             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 874             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 875         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 876             s->intra_matrix[j] =
 877             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 878         } else {
 879             /* mpeg1/2 */
 880             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 881             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 882         }
 883         if (s->avctx->intra_matrix)
 884             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 885         if (s->avctx->inter_matrix)
 886             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 887     }
 888
 889     /* precompute matrix */
 890     /* for mjpeg, we do include qscale in the matrix */
 891     if (s->out_format != FMT_MJPEG) {
 892         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 893                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 894                           31, 1);
 895         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 896                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 897                           31, 0);
 898     }
 899
 900     if (ff_rate_control_init(s) < 0)
 901         return -1;
 902
 903 #if FF_API_ERROR_RATE
 904     FF_DISABLE_DEPRECATION_WARNINGS
 905     if (avctx->error_rate)
 906         s->error_rate = avctx->error_rate;
 907     FF_ENABLE_DEPRECATION_WARNINGS;
 908 #endif
 909
 910 #if FF_API_NORMALIZE_AQP
 911     FF_DISABLE_DEPRECATION_WARNINGS
 912     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 913         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 914     FF_ENABLE_DEPRECATION_WARNINGS;
 915 #endif
 916
 917 #if FF_API_MV0
 918     FF_DISABLE_DEPRECATION_WARNINGS
 919     if (avctx->flags & CODEC_FLAG_MV0)
 920         s->mpv_flags |= FF_MPV_FLAG_MV0;
 921     FF_ENABLE_DEPRECATION_WARNINGS
 922 #endif
 923
 924     if (avctx->b_frame_strategy == 2) {
 925         for (i = 0; i < s->max_b_frames + 2; i++) {
 926             s->tmp_frames[i] = av_frame_alloc();
 927             if (!s->tmp_frames[i])
 928                 return AVERROR(ENOMEM);
 929
 930             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 931             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 932             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 933
 934             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 935             if (ret < 0)
 936                 return ret;
 937         }
 938     }
 939
 940     return 0;
 941 fail:
 942     ff_MPV_encode_end(avctx);
 943     return AVERROR_UNKNOWN;
 944 }
 945
 946 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
 947 {
 948     MpegEncContext *s = avctx->priv_data;
 949     int i;
 950
 951     ff_rate_control_uninit(s);
 952
 953     ff_MPV_common_end(s);
 954     if (CONFIG_MJPEG_ENCODER &&
 955         s->out_format == FMT_MJPEG)
 956         ff_mjpeg_encode_close(s);
 957
 958     av_freep(&avctx->extradata);
 959
 960     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 961         av_frame_free(&s->tmp_frames[i]);
 962
 963     ff_free_picture_tables(&s->new_picture);
 964     ff_mpeg_unref_picture(s, &s->new_picture);
 965
 966     av_freep(&s->avctx->stats_out);
 967     av_freep(&s->ac_stats);
 968
 969     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
 970     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
 971     s->q_chroma_intra_matrix=   NULL;
 972     s->q_chroma_intra_matrix16= NULL;
 973     av_freep(&s->q_intra_matrix);
 974     av_freep(&s->q_inter_matrix);
 975     av_freep(&s->q_intra_matrix16);
 976     av_freep(&s->q_inter_matrix16);
 977     av_freep(&s->input_picture);
 978     av_freep(&s->reordered_input_picture);
 979     av_freep(&s->dct_offset);
 980
 981     return 0;
 982 }
 983
 984 static int get_sae(uint8_t *src, int ref, int stride)
 985 {
 986     int x,y;
 987     int acc = 0;
 988
 989     for (y = 0; y < 16; y++) {
 990         for (x = 0; x < 16; x++) {
 991             acc += FFABS(src[x + y * stride] - ref);
 992         }
 993     }
 994
 995     return acc;
 996 }
 997
 998 static int get_intra_count(MpegEncContext *s, uint8_t *src,
 999                            uint8_t *ref, int stride)
1000 {
1001     int x, y, w, h;
1002     int acc = 0;
1003
1004     w = s->width  & ~15;
1005     h = s->height & ~15;
1006
1007     for (y = 0; y < h; y += 16) {
1008         for (x = 0; x < w; x += 16) {
1009             int offset = x + y * stride;
1010             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
1011                                      16);
1012             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
1013             int sae  = get_sae(src + offset, mean, stride);
1014
1015             acc += sae + 500 < sad;
1016         }
1017     }
1018     return acc;
1019 }
1020
1021
1022 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1023 {
1024     Picture *pic = NULL;
1025     int64_t pts;
1026     int i, display_picture_number = 0, ret;
1027     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1028                                                  (s->low_delay ? 0 : 1);
1029     int direct = 1;
1030
1031     if (pic_arg) {
1032         pts = pic_arg->pts;
1033         display_picture_number = s->input_picture_number++;
1034
1035         if (pts != AV_NOPTS_VALUE) {
1036             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1037                 int64_t last = s->user_specified_pts;
1038
1039                 if (pts <= last) {
1040                     av_log(s->avctx, AV_LOG_ERROR,
1041                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1042                            pts, last);
1043                     return AVERROR(EINVAL);
1044                 }
1045
1046                 if (!s->low_delay && display_picture_number == 1)
1047                     s->dts_delta = pts - last;
1048             }
1049             s->user_specified_pts = pts;
1050         } else {
1051             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1052                 s->user_specified_pts =
1053                 pts = s->user_specified_pts + 1;
1054                 av_log(s->avctx, AV_LOG_INFO,
1055                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1056                        pts);
1057             } else {
1058                 pts = display_picture_number;
1059             }
1060         }
1061     }
1062
1063     if (pic_arg) {
1064         if (!pic_arg->buf[0])
1065             direct = 0;
1066         if (pic_arg->linesize[0] != s->linesize)
1067             direct = 0;
1068         if (pic_arg->linesize[1] != s->uvlinesize)
1069             direct = 0;
1070         if (pic_arg->linesize[2] != s->uvlinesize)
1071             direct = 0;
1072         if ((s->width & 15) || (s->height & 15))
1073             direct = 0;
1074         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1075             direct = 0;
1076         if (s->linesize & (STRIDE_ALIGN-1))
1077             direct = 0;
1078
1079         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1080                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1081
1082         if (direct) {
1083             i = ff_find_unused_picture(s, 1);
1084             if (i < 0)
1085                 return i;
1086
1087             pic = &s->picture[i];
1088             pic->reference = 3;
1089
1090             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1091                 return ret;
1092             if (ff_alloc_picture(s, pic, 1) < 0) {
1093                 return -1;
1094             }
1095         } else {
1096             i = ff_find_unused_picture(s, 0);
1097             if (i < 0)
1098                 return i;
1099
1100             pic = &s->picture[i];
1101             pic->reference = 3;
1102
1103             if (ff_alloc_picture(s, pic, 0) < 0) {
1104                 return -1;
1105             }
1106
1107             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1108                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1109                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1110                 // empty
1111             } else {
1112                 int h_chroma_shift, v_chroma_shift;
1113                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1114                                                  &h_chroma_shift,
1115                                                  &v_chroma_shift);
1116
1117                 for (i = 0; i < 3; i++) {
1118                     int src_stride = pic_arg->linesize[i];
1119                     int dst_stride = i ? s->uvlinesize : s->linesize;
1120                     int h_shift = i ? h_chroma_shift : 0;
1121                     int v_shift = i ? v_chroma_shift : 0;
1122                     int w = s->width  >> h_shift;
1123                     int h = s->height >> v_shift;
1124                     uint8_t *src = pic_arg->data[i];
1125                     uint8_t *dst = pic->f->data[i];
1126                     int vpad = 16;
1127
1128                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1129                         && !s->progressive_sequence)
1130                         vpad = 32;
1131
1132                     if (!s->avctx->rc_buffer_size)
1133                         dst += INPLACE_OFFSET;
1134
1135                     if (src_stride == dst_stride)
1136                         memcpy(dst, src, src_stride * h);
1137                     else {
1138                         int h2 = h;
1139                         uint8_t *dst2 = dst;
1140                         while (h2--) {
1141                             memcpy(dst2, src, w);
1142                             dst2 += dst_stride;
1143                             src += src_stride;
1144                         }
1145                     }
1146                     if ((s->width & 15) || (s->height & (vpad-1))) {
1147                         s->dsp.draw_edges(dst, dst_stride,
1148                                           w, h,
1149                                           16>>h_shift,
1150                                           vpad>>v_shift,
1151                                           EDGE_BOTTOM);
1152                     }
1153                 }
1154             }
1155         }
1156         ret = av_frame_copy_props(pic->f, pic_arg);
1157         if (ret < 0)
1158             return ret;
1159
1160         pic->f->display_picture_number = display_picture_number;
1161         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1162     }
1163
1164     /* shift buffer entries */
1165     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1166         s->input_picture[i - 1] = s->input_picture[i];
1167
1168     s->input_picture[encoding_delay] = (Picture*) pic;
1169
1170     return 0;
1171 }
1172
1173 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1174 {
1175     int x, y, plane;
1176     int score = 0;
1177     int64_t score64 = 0;
1178
1179     for (plane = 0; plane < 3; plane++) {
1180         const int stride = p->f->linesize[plane];
1181         const int bw = plane ? 1 : 2;
1182         for (y = 0; y < s->mb_height * bw; y++) {
1183             for (x = 0; x < s->mb_width * bw; x++) {
1184                 int off = p->shared ? 0 : 16;
1185                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1186                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1187                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1188
1189                 switch (FFABS(s->avctx->frame_skip_exp)) {
1190                 case 0: score    =  FFMAX(score, v);          break;
1191                 case 1: score   += FFABS(v);                  break;
1192                 case 2: score64 += v * (int64_t)v;                       break;
1193                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1194                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1195                 }
1196             }
1197         }
1198     }
1199     emms_c();
1200
1201     if (score)
1202         score64 = score;
1203     if (s->avctx->frame_skip_exp < 0)
1204         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1205                       -1.0/s->avctx->frame_skip_exp);
1206
1207     if (score64 < s->avctx->frame_skip_threshold)
1208         return 1;
1209     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1210         return 1;
1211     return 0;
1212 }
1213
1214 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1215 {
1216     AVPacket pkt = { 0 };
1217     int ret, got_output;
1218
1219     av_init_packet(&pkt);
1220     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1221     if (ret < 0)
1222         return ret;
1223
1224     ret = pkt.size;
1225     av_free_packet(&pkt);
1226     return ret;
1227 }
1228
1229 static int estimate_best_b_count(MpegEncContext *s)
1230 {
1231     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1232     AVCodecContext *c = avcodec_alloc_context3(NULL);
1233     const int scale = s->avctx->brd_scale;
1234     int i, j, out_size, p_lambda, b_lambda, lambda2;
1235     int64_t best_rd  = INT64_MAX;
1236     int best_b_count = -1;
1237
1238     av_assert0(scale >= 0 && scale <= 3);
1239
1240     //emms_c();
1241     //s->next_picture_ptr->quality;
1242     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1243     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1244     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1245     if (!b_lambda) // FIXME we should do this somewhere else
1246         b_lambda = p_lambda;
1247     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1248                FF_LAMBDA_SHIFT;
1249
1250     c->width        = s->width  >> scale;
1251     c->height       = s->height >> scale;
1252     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1253     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1254     c->mb_decision  = s->avctx->mb_decision;
1255     c->me_cmp       = s->avctx->me_cmp;
1256     c->mb_cmp       = s->avctx->mb_cmp;
1257     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1258     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1259     c->time_base    = s->avctx->time_base;
1260     c->max_b_frames = s->max_b_frames;
1261
1262     if (avcodec_open2(c, codec, NULL) < 0)
1263         return -1;
1264
1265     for (i = 0; i < s->max_b_frames + 2; i++) {
1266         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1267                                                 s->next_picture_ptr;
1268
1269         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1270             pre_input = *pre_input_ptr;
1271
1272             if (!pre_input.shared && i) {
1273                 pre_input.f->data[0] += INPLACE_OFFSET;
1274                 pre_input.f->data[1] += INPLACE_OFFSET;
1275                 pre_input.f->data[2] += INPLACE_OFFSET;
1276             }
1277
1278             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1279                                  pre_input.f->data[0], pre_input.f->linesize[0],
1280                                  c->width,      c->height);
1281             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1282                                  pre_input.f->data[1], pre_input.f->linesize[1],
1283                                  c->width >> 1, c->height >> 1);
1284             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1285                                  pre_input.f->data[2], pre_input.f->linesize[2],
1286                                  c->width >> 1, c->height >> 1);
1287         }
1288     }
1289
1290     for (j = 0; j < s->max_b_frames + 1; j++) {
1291         int64_t rd = 0;
1292
1293         if (!s->input_picture[j])
1294             break;
1295
1296         c->error[0] = c->error[1] = c->error[2] = 0;
1297
1298         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1299         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1300
1301         out_size = encode_frame(c, s->tmp_frames[0]);
1302
1303         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1304
1305         for (i = 0; i < s->max_b_frames + 1; i++) {
1306             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1307
1308             s->tmp_frames[i + 1]->pict_type = is_p ?
1309                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1310             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1311
1312             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1313
1314             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1315         }
1316
1317         /* get the delayed frames */
1318         while (out_size) {
1319             out_size = encode_frame(c, NULL);
1320             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1321         }
1322
1323         rd += c->error[0] + c->error[1] + c->error[2];
1324
1325         if (rd < best_rd) {
1326             best_rd = rd;
1327             best_b_count = j;
1328         }
1329     }
1330
1331     avcodec_close(c);
1332     av_freep(&c);
1333
1334     return best_b_count;
1335 }
1336
1337 static int select_input_picture(MpegEncContext *s)
1338 {
1339     int i, ret;
1340
1341     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1342         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1343     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1344
1345     /* set next picture type & ordering */
1346     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1347         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1348             if (s->picture_in_gop_number < s->gop_size &&
1349                 s->next_picture_ptr &&
1350                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1351                 // FIXME check that te gop check above is +-1 correct
1352                 av_frame_unref(s->input_picture[0]->f);
1353
1354                 ff_vbv_update(s, 0);
1355
1356                 goto no_output_pic;
1357             }
1358         }
1359
1360         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1361             s->next_picture_ptr == NULL || s->intra_only) {
1362             s->reordered_input_picture[0] = s->input_picture[0];
1363             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1364             s->reordered_input_picture[0]->f->coded_picture_number =
1365                 s->coded_picture_number++;
1366         } else {
1367             int b_frames;
1368
1369             if (s->flags & CODEC_FLAG_PASS2) {
1370                 for (i = 0; i < s->max_b_frames + 1; i++) {
1371                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1372
1373                     if (pict_num >= s->rc_context.num_entries)
1374                         break;
1375                     if (!s->input_picture[i]) {
1376                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1377                         break;
1378                     }
1379
1380                     s->input_picture[i]->f->pict_type =
1381                         s->rc_context.entry[pict_num].new_pict_type;
1382                 }
1383             }
1384
1385             if (s->avctx->b_frame_strategy == 0) {
1386                 b_frames = s->max_b_frames;
1387                 while (b_frames && !s->input_picture[b_frames])
1388                     b_frames--;
1389             } else if (s->avctx->b_frame_strategy == 1) {
1390                 for (i = 1; i < s->max_b_frames + 1; i++) {
1391                     if (s->input_picture[i] &&
1392                         s->input_picture[i]->b_frame_score == 0) {
1393                         s->input_picture[i]->b_frame_score =
1394                             get_intra_count(s,
1395                                             s->input_picture[i    ]->f->data[0],
1396                                             s->input_picture[i - 1]->f->data[0],
1397                                             s->linesize) + 1;
1398                     }
1399                 }
1400                 for (i = 0; i < s->max_b_frames + 1; i++) {
1401                     if (s->input_picture[i] == NULL ||
1402                         s->input_picture[i]->b_frame_score - 1 >
1403                             s->mb_num / s->avctx->b_sensitivity)
1404                         break;
1405                 }
1406
1407                 b_frames = FFMAX(0, i - 1);
1408
1409                 /* reset scores */
1410                 for (i = 0; i < b_frames + 1; i++) {
1411                     s->input_picture[i]->b_frame_score = 0;
1412                 }
1413             } else if (s->avctx->b_frame_strategy == 2) {
1414                 b_frames = estimate_best_b_count(s);
1415             } else {
1416                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1417                 b_frames = 0;
1418             }
1419
1420             emms_c();
1421
1422             for (i = b_frames - 1; i >= 0; i--) {
1423                 int type = s->input_picture[i]->f->pict_type;
1424                 if (type && type != AV_PICTURE_TYPE_B)
1425                     b_frames = i;
1426             }
1427             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1428                 b_frames == s->max_b_frames) {
1429                 av_log(s->avctx, AV_LOG_ERROR,
1430                        "warning, too many b frames in a row\n");
1431             }
1432
1433             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1434                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1435                     s->gop_size > s->picture_in_gop_number) {
1436                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1437                 } else {
1438                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1439                         b_frames = 0;
1440                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1441                 }
1442             }
1443
1444             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1445                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1446                 b_frames--;
1447
1448             s->reordered_input_picture[0] = s->input_picture[b_frames];
1449             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1450                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1451             s->reordered_input_picture[0]->f->coded_picture_number =
1452                 s->coded_picture_number++;
1453             for (i = 0; i < b_frames; i++) {
1454                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1455                 s->reordered_input_picture[i + 1]->f->pict_type =
1456                     AV_PICTURE_TYPE_B;
1457                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1458                     s->coded_picture_number++;
1459             }
1460         }
1461     }
1462 no_output_pic:
1463     if (s->reordered_input_picture[0]) {
1464         s->reordered_input_picture[0]->reference =
1465            s->reordered_input_picture[0]->f->pict_type !=
1466                AV_PICTURE_TYPE_B ? 3 : 0;
1467
1468         ff_mpeg_unref_picture(s, &s->new_picture);
1469         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1470             return ret;
1471
1472         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1473             // input is a shared pix, so we can't modifiy it -> alloc a new
1474             // one & ensure that the shared one is reuseable
1475
1476             Picture *pic;
1477             int i = ff_find_unused_picture(s, 0);
1478             if (i < 0)
1479                 return i;
1480             pic = &s->picture[i];
1481
1482             pic->reference = s->reordered_input_picture[0]->reference;
1483             if (ff_alloc_picture(s, pic, 0) < 0) {
1484                 return -1;
1485             }
1486
1487             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1488             if (ret < 0)
1489                 return ret;
1490
1491             /* mark us unused / free shared pic */
1492             av_frame_unref(s->reordered_input_picture[0]->f);
1493             s->reordered_input_picture[0]->shared = 0;
1494
1495             s->current_picture_ptr = pic;
1496         } else {
1497             // input is not a shared pix -> reuse buffer for current_pix
1498             s->current_picture_ptr = s->reordered_input_picture[0];
1499             for (i = 0; i < 4; i++) {
1500                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1501             }
1502         }
1503         ff_mpeg_unref_picture(s, &s->current_picture);
1504         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1505                                        s->current_picture_ptr)) < 0)
1506             return ret;
1507
1508         s->picture_number = s->new_picture.f->display_picture_number;
1509     } else {
1510         ff_mpeg_unref_picture(s, &s->new_picture);
1511     }
1512     return 0;
1513 }
1514
1515 static void frame_end(MpegEncContext *s)
1516 {
1517     if (s->unrestricted_mv &&
1518         s->current_picture.reference &&
1519         !s->intra_only) {
1520         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1521         int hshift = desc->log2_chroma_w;
1522         int vshift = desc->log2_chroma_h;
1523         s->dsp.draw_edges(s->current_picture.f->data[0], s->current_picture.f->linesize[0],
1524                           s->h_edge_pos, s->v_edge_pos,
1525                           EDGE_WIDTH, EDGE_WIDTH,
1526                           EDGE_TOP | EDGE_BOTTOM);
1527         s->dsp.draw_edges(s->current_picture.f->data[1], s->current_picture.f->linesize[1],
1528                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1529                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1530                           EDGE_TOP | EDGE_BOTTOM);
1531         s->dsp.draw_edges(s->current_picture.f->data[2], s->current_picture.f->linesize[2],
1532                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1533                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1534                           EDGE_TOP | EDGE_BOTTOM);
1535     }
1536
1537     emms_c();
1538
1539     s->last_pict_type                 = s->pict_type;
1540     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1541     if (s->pict_type!= AV_PICTURE_TYPE_B)
1542         s->last_non_b_pict_type = s->pict_type;
1543
1544     s->avctx->coded_frame = s->current_picture_ptr->f;
1545
1546 }
1547
1548 static void update_noise_reduction(MpegEncContext *s)
1549 {
1550     int intra, i;
1551
1552     for (intra = 0; intra < 2; intra++) {
1553         if (s->dct_count[intra] > (1 << 16)) {
1554             for (i = 0; i < 64; i++) {
1555                 s->dct_error_sum[intra][i] >>= 1;
1556             }
1557             s->dct_count[intra] >>= 1;
1558         }
1559
1560         for (i = 0; i < 64; i++) {
1561             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1562                                        s->dct_count[intra] +
1563                                        s->dct_error_sum[intra][i] / 2) /
1564                                       (s->dct_error_sum[intra][i] + 1);
1565         }
1566     }
1567 }
1568
1569 static int frame_start(MpegEncContext *s)
1570 {
1571     int ret;
1572
1573     /* mark & release old frames */
1574     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1575         s->last_picture_ptr != s->next_picture_ptr &&
1576         s->last_picture_ptr->f->buf[0]) {
1577         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1578     }
1579
1580     s->current_picture_ptr->f->pict_type = s->pict_type;
1581     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1582
1583     ff_mpeg_unref_picture(s, &s->current_picture);
1584     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1585                                    s->current_picture_ptr)) < 0)
1586         return ret;
1587
1588     if (s->pict_type != AV_PICTURE_TYPE_B) {
1589         s->last_picture_ptr = s->next_picture_ptr;
1590         if (!s->droppable)
1591             s->next_picture_ptr = s->current_picture_ptr;
1592     }
1593
1594     if (s->last_picture_ptr) {
1595         ff_mpeg_unref_picture(s, &s->last_picture);
1596         if (s->last_picture_ptr->f->buf[0] &&
1597             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1598                                        s->last_picture_ptr)) < 0)
1599             return ret;
1600     }
1601     if (s->next_picture_ptr) {
1602         ff_mpeg_unref_picture(s, &s->next_picture);
1603         if (s->next_picture_ptr->f->buf[0] &&
1604             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1605                                        s->next_picture_ptr)) < 0)
1606             return ret;
1607     }
1608
1609     if (s->picture_structure!= PICT_FRAME) {
1610         int i;
1611         for (i = 0; i < 4; i++) {
1612             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1613                 s->current_picture.f->data[i] +=
1614                     s->current_picture.f->linesize[i];
1615             }
1616             s->current_picture.f->linesize[i] *= 2;
1617             s->last_picture.f->linesize[i]    *= 2;
1618             s->next_picture.f->linesize[i]    *= 2;
1619         }
1620     }
1621
1622     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1623         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1624         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1625     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1626         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1627         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1628     } else {
1629         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1630         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1631     }
1632
1633     if (s->dct_error_sum) {
1634         av_assert2(s->avctx->noise_reduction && s->encoding);
1635         update_noise_reduction(s);
1636     }
1637
1638     return 0;
1639 }
1640
1641 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1642                           const AVFrame *pic_arg, int *got_packet)
1643 {
1644     MpegEncContext *s = avctx->priv_data;
1645     int i, stuffing_count, ret;
1646     int context_count = s->slice_context_count;
1647
1648     s->picture_in_gop_number++;
1649
1650     if (load_input_picture(s, pic_arg) < 0)
1651         return -1;
1652
1653     if (select_input_picture(s) < 0) {
1654         return -1;
1655     }
1656
1657     /* output? */
1658     if (s->new_picture.f->data[0]) {
1659         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1660             return ret;
1661         if (s->mb_info) {
1662             s->mb_info_ptr = av_packet_new_side_data(pkt,
1663                                  AV_PKT_DATA_H263_MB_INFO,
1664                                  s->mb_width*s->mb_height*12);
1665             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1666         }
1667
1668         for (i = 0; i < context_count; i++) {
1669             int start_y = s->thread_context[i]->start_mb_y;
1670             int   end_y = s->thread_context[i]->  end_mb_y;
1671             int h       = s->mb_height;
1672             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1673             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1674
1675             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1676         }
1677
1678         s->pict_type = s->new_picture.f->pict_type;
1679         //emms_c();
1680         ret = frame_start(s);
1681         if (ret < 0)
1682             return ret;
1683 vbv_retry:
1684         if (encode_picture(s, s->picture_number) < 0)
1685             return -1;
1686
1687         avctx->header_bits = s->header_bits;
1688         avctx->mv_bits     = s->mv_bits;
1689         avctx->misc_bits   = s->misc_bits;
1690         avctx->i_tex_bits  = s->i_tex_bits;
1691         avctx->p_tex_bits  = s->p_tex_bits;
1692         avctx->i_count     = s->i_count;
1693         // FIXME f/b_count in avctx
1694         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1695         avctx->skip_count  = s->skip_count;
1696
1697         frame_end(s);
1698
1699         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1700             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1701
1702         if (avctx->rc_buffer_size) {
1703             RateControlContext *rcc = &s->rc_context;
1704             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1705
1706             if (put_bits_count(&s->pb) > max_size &&
1707                 s->lambda < s->avctx->lmax) {
1708                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1709                                        (s->qscale + 1) / s->qscale);
1710                 if (s->adaptive_quant) {
1711                     int i;
1712                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1713                         s->lambda_table[i] =
1714                             FFMAX(s->lambda_table[i] + 1,
1715                                   s->lambda_table[i] * (s->qscale + 1) /
1716                                   s->qscale);
1717                 }
1718                 s->mb_skipped = 0;        // done in frame_start()
1719                 // done in encode_picture() so we must undo it
1720                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1721                     if (s->flipflop_rounding          ||
1722                         s->codec_id == AV_CODEC_ID_H263P ||
1723                         s->codec_id == AV_CODEC_ID_MPEG4)
1724                         s->no_rounding ^= 1;
1725                 }
1726                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1727                     s->time_base       = s->last_time_base;
1728                     s->last_non_b_time = s->time - s->pp_time;
1729                 }
1730                 for (i = 0; i < context_count; i++) {
1731                     PutBitContext *pb = &s->thread_context[i]->pb;
1732                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1733                 }
1734                 goto vbv_retry;
1735             }
1736
1737             av_assert0(s->avctx->rc_max_rate);
1738         }
1739
1740         if (s->flags & CODEC_FLAG_PASS1)
1741             ff_write_pass1_stats(s);
1742
1743         for (i = 0; i < 4; i++) {
1744             s->current_picture_ptr->f->error[i] =
1745             s->current_picture.f->error[i] =
1746                 s->current_picture.error[i];
1747             avctx->error[i] += s->current_picture_ptr->f->error[i];
1748         }
1749
1750         if (s->flags & CODEC_FLAG_PASS1)
1751             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1752                    avctx->i_tex_bits + avctx->p_tex_bits ==
1753                        put_bits_count(&s->pb));
1754         flush_put_bits(&s->pb);
1755         s->frame_bits  = put_bits_count(&s->pb);
1756
1757         stuffing_count = ff_vbv_update(s, s->frame_bits);
1758         s->stuffing_bits = 8*stuffing_count;
1759         if (stuffing_count) {
1760             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1761                     stuffing_count + 50) {
1762                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1763                 return -1;
1764             }
1765
1766             switch (s->codec_id) {
1767             case AV_CODEC_ID_MPEG1VIDEO:
1768             case AV_CODEC_ID_MPEG2VIDEO:
1769                 while (stuffing_count--) {
1770                     put_bits(&s->pb, 8, 0);
1771                 }
1772             break;
1773             case AV_CODEC_ID_MPEG4:
1774                 put_bits(&s->pb, 16, 0);
1775                 put_bits(&s->pb, 16, 0x1C3);
1776                 stuffing_count -= 4;
1777                 while (stuffing_count--) {
1778                     put_bits(&s->pb, 8, 0xFF);
1779                 }
1780             break;
1781             default:
1782                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1783             }
1784             flush_put_bits(&s->pb);
1785             s->frame_bits  = put_bits_count(&s->pb);
1786         }
1787
1788         /* update mpeg1/2 vbv_delay for CBR */
1789         if (s->avctx->rc_max_rate                          &&
1790             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1791             s->out_format == FMT_MPEG1                     &&
1792             90000LL * (avctx->rc_buffer_size - 1) <=
1793                 s->avctx->rc_max_rate * 0xFFFFLL) {
1794             int vbv_delay, min_delay;
1795             double inbits  = s->avctx->rc_max_rate *
1796                              av_q2d(s->avctx->time_base);
1797             int    minbits = s->frame_bits - 8 *
1798                              (s->vbv_delay_ptr - s->pb.buf - 1);
1799             double bits    = s->rc_context.buffer_index + minbits - inbits;
1800
1801             if (bits < 0)
1802                 av_log(s->avctx, AV_LOG_ERROR,
1803                        "Internal error, negative bits\n");
1804
1805             assert(s->repeat_first_field == 0);
1806
1807             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1808             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1809                         s->avctx->rc_max_rate;
1810
1811             vbv_delay = FFMAX(vbv_delay, min_delay);
1812
1813             av_assert0(vbv_delay < 0xFFFF);
1814
1815             s->vbv_delay_ptr[0] &= 0xF8;
1816             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1817             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1818             s->vbv_delay_ptr[2] &= 0x07;
1819             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1820             avctx->vbv_delay     = vbv_delay * 300;
1821         }
1822         s->total_bits     += s->frame_bits;
1823         avctx->frame_bits  = s->frame_bits;
1824
1825         pkt->pts = s->current_picture.f->pts;
1826         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1827             if (!s->current_picture.f->coded_picture_number)
1828                 pkt->dts = pkt->pts - s->dts_delta;
1829             else
1830                 pkt->dts = s->reordered_pts;
1831             s->reordered_pts = pkt->pts;
1832         } else
1833             pkt->dts = pkt->pts;
1834         if (s->current_picture.f->key_frame)
1835             pkt->flags |= AV_PKT_FLAG_KEY;
1836         if (s->mb_info)
1837             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1838     } else {
1839         s->frame_bits = 0;
1840     }
1841
1842     /* release non-reference frames */
1843     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1844         if (!s->picture[i].reference)
1845             ff_mpeg_unref_picture(s, &s->picture[i]);
1846     }
1847
1848     av_assert1((s->frame_bits & 7) == 0);
1849
1850     pkt->size = s->frame_bits / 8;
1851     *got_packet = !!pkt->size;
1852     return 0;
1853 }
1854
1855 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1856                                                 int n, int threshold)
1857 {
1858     static const char tab[64] = {
1859         3, 2, 2, 1, 1, 1, 1, 1,
1860         1, 1, 1, 1, 1, 1, 1, 1,
1861         1, 1, 1, 1, 1, 1, 1, 1,
1862         0, 0, 0, 0, 0, 0, 0, 0,
1863         0, 0, 0, 0, 0, 0, 0, 0,
1864         0, 0, 0, 0, 0, 0, 0, 0,
1865         0, 0, 0, 0, 0, 0, 0, 0,
1866         0, 0, 0, 0, 0, 0, 0, 0
1867     };
1868     int score = 0;
1869     int run = 0;
1870     int i;
1871     int16_t *block = s->block[n];
1872     const int last_index = s->block_last_index[n];
1873     int skip_dc;
1874
1875     if (threshold < 0) {
1876         skip_dc = 0;
1877         threshold = -threshold;
1878     } else
1879         skip_dc = 1;
1880
1881     /* Are all we could set to zero already zero? */
1882     if (last_index <= skip_dc - 1)
1883         return;
1884
1885     for (i = 0; i <= last_index; i++) {
1886         const int j = s->intra_scantable.permutated[i];
1887         const int level = FFABS(block[j]);
1888         if (level == 1) {
1889             if (skip_dc && i == 0)
1890                 continue;
1891             score += tab[run];
1892             run = 0;
1893         } else if (level > 1) {
1894             return;
1895         } else {
1896             run++;
1897         }
1898     }
1899     if (score >= threshold)
1900         return;
1901     for (i = skip_dc; i <= last_index; i++) {
1902         const int j = s->intra_scantable.permutated[i];
1903         block[j] = 0;
1904     }
1905     if (block[0])
1906         s->block_last_index[n] = 0;
1907     else
1908         s->block_last_index[n] = -1;
1909 }
1910
1911 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1912                                int last_index)
1913 {
1914     int i;
1915     const int maxlevel = s->max_qcoeff;
1916     const int minlevel = s->min_qcoeff;
1917     int overflow = 0;
1918
1919     if (s->mb_intra) {
1920         i = 1; // skip clipping of intra dc
1921     } else
1922         i = 0;
1923
1924     for (; i <= last_index; i++) {
1925         const int j = s->intra_scantable.permutated[i];
1926         int level = block[j];
1927
1928         if (level > maxlevel) {
1929             level = maxlevel;
1930             overflow++;
1931         } else if (level < minlevel) {
1932             level = minlevel;
1933             overflow++;
1934         }
1935
1936         block[j] = level;
1937     }
1938
1939     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1940         av_log(s->avctx, AV_LOG_INFO,
1941                "warning, clipping %d dct coefficients to %d..%d\n",
1942                overflow, minlevel, maxlevel);
1943 }
1944
1945 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1946 {
1947     int x, y;
1948     // FIXME optimize
1949     for (y = 0; y < 8; y++) {
1950         for (x = 0; x < 8; x++) {
1951             int x2, y2;
1952             int sum = 0;
1953             int sqr = 0;
1954             int count = 0;
1955
1956             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1957                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1958                     int v = ptr[x2 + y2 * stride];
1959                     sum += v;
1960                     sqr += v * v;
1961                     count++;
1962                 }
1963             }
1964             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1965         }
1966     }
1967 }
1968
1969 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1970                                                 int motion_x, int motion_y,
1971                                                 int mb_block_height,
1972                                                 int mb_block_width,
1973                                                 int mb_block_count)
1974 {
1975     int16_t weight[12][64];
1976     int16_t orig[12][64];
1977     const int mb_x = s->mb_x;
1978     const int mb_y = s->mb_y;
1979     int i;
1980     int skip_dct[12];
1981     int dct_offset = s->linesize * 8; // default for progressive frames
1982     int uv_dct_offset = s->uvlinesize * 8;
1983     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1984     ptrdiff_t wrap_y, wrap_c;
1985
1986     for (i = 0; i < mb_block_count; i++)
1987         skip_dct[i] = s->skipdct;
1988
1989     if (s->adaptive_quant) {
1990         const int last_qp = s->qscale;
1991         const int mb_xy = mb_x + mb_y * s->mb_stride;
1992
1993         s->lambda = s->lambda_table[mb_xy];
1994         update_qscale(s);
1995
1996         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1997             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1998             s->dquant = s->qscale - last_qp;
1999
2000             if (s->out_format == FMT_H263) {
2001                 s->dquant = av_clip(s->dquant, -2, 2);
2002
2003                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2004                     if (!s->mb_intra) {
2005                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2006                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2007                                 s->dquant = 0;
2008                         }
2009                         if (s->mv_type == MV_TYPE_8X8)
2010                             s->dquant = 0;
2011                     }
2012                 }
2013             }
2014         }
2015         ff_set_qscale(s, last_qp + s->dquant);
2016     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2017         ff_set_qscale(s, s->qscale + s->dquant);
2018
2019     wrap_y = s->linesize;
2020     wrap_c = s->uvlinesize;
2021     ptr_y  = s->new_picture.f->data[0] +
2022              (mb_y * 16 * wrap_y)              + mb_x * 16;
2023     ptr_cb = s->new_picture.f->data[1] +
2024              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2025     ptr_cr = s->new_picture.f->data[2] +
2026              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2027
2028     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2029         uint8_t *ebuf = s->edge_emu_buffer + 36 * wrap_y;
2030         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2031         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2032         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2033                                  wrap_y, wrap_y,
2034                                  16, 16, mb_x * 16, mb_y * 16,
2035                                  s->width, s->height);
2036         ptr_y = ebuf;
2037         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2038                                  wrap_c, wrap_c,
2039                                  mb_block_width, mb_block_height,
2040                                  mb_x * mb_block_width, mb_y * mb_block_height,
2041                                  cw, ch);
2042         ptr_cb = ebuf + 16 * wrap_y;
2043         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2044                                  wrap_c, wrap_c,
2045                                  mb_block_width, mb_block_height,
2046                                  mb_x * mb_block_width, mb_y * mb_block_height,
2047                                  cw, ch);
2048         ptr_cr = ebuf + 16 * wrap_y + 16;
2049     }
2050
2051     if (s->mb_intra) {
2052         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2053             int progressive_score, interlaced_score;
2054
2055             s->interlaced_dct = 0;
2056             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
2057                                                     NULL, wrap_y, 8) +
2058                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2059                                                     NULL, wrap_y, 8) - 400;
2060
2061             if (progressive_score > 0) {
2062                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
2063                                                        NULL, wrap_y * 2, 8) +
2064                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
2065                                                        NULL, wrap_y * 2, 8);
2066                 if (progressive_score > interlaced_score) {
2067                     s->interlaced_dct = 1;
2068
2069                     dct_offset = wrap_y;
2070                     uv_dct_offset = wrap_c;
2071                     wrap_y <<= 1;
2072                     if (s->chroma_format == CHROMA_422 ||
2073                         s->chroma_format == CHROMA_444)
2074                         wrap_c <<= 1;
2075                 }
2076             }
2077         }
2078
2079         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
2080         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
2081         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
2082         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
2083
2084         if (s->flags & CODEC_FLAG_GRAY) {
2085             skip_dct[4] = 1;
2086             skip_dct[5] = 1;
2087         } else {
2088             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2089             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2090             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2091                 s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2092                 s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2093             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2094                 s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c);
2095                 s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c);
2096                 s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c);
2097                 s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c);
2098                 s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2099                 s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2100             }
2101         }
2102     } else {
2103         op_pixels_func (*op_pix)[4];
2104         qpel_mc_func (*op_qpix)[16];
2105         uint8_t *dest_y, *dest_cb, *dest_cr;
2106
2107         dest_y  = s->dest[0];
2108         dest_cb = s->dest[1];
2109         dest_cr = s->dest[2];
2110
2111         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2112             op_pix  = s->hdsp.put_pixels_tab;
2113             op_qpix = s->qdsp.put_qpel_pixels_tab;
2114         } else {
2115             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2116             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2117         }
2118
2119         if (s->mv_dir & MV_DIR_FORWARD) {
2120             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
2121                           s->last_picture.f->data,
2122                           op_pix, op_qpix);
2123             op_pix  = s->hdsp.avg_pixels_tab;
2124             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2125         }
2126         if (s->mv_dir & MV_DIR_BACKWARD) {
2127             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
2128                           s->next_picture.f->data,
2129                           op_pix, op_qpix);
2130         }
2131
2132         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2133             int progressive_score, interlaced_score;
2134
2135             s->interlaced_dct = 0;
2136             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
2137                                                     ptr_y,              wrap_y,
2138                                                     8) +
2139                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
2140                                                     ptr_y + wrap_y * 8, wrap_y,
2141                                                     8) - 400;
2142
2143             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2144                 progressive_score -= 400;
2145
2146             if (progressive_score > 0) {
2147                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
2148                                                        ptr_y,
2149                                                        wrap_y * 2, 8) +
2150                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
2151                                                        ptr_y + wrap_y,
2152                                                        wrap_y * 2, 8);
2153
2154                 if (progressive_score > interlaced_score) {
2155                     s->interlaced_dct = 1;
2156
2157                     dct_offset = wrap_y;
2158                     uv_dct_offset = wrap_c;
2159                     wrap_y <<= 1;
2160                     if (s->chroma_format == CHROMA_422)
2161                         wrap_c <<= 1;
2162                 }
2163             }
2164         }
2165
2166         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2167         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2168         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2169                            dest_y + dct_offset, wrap_y);
2170         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2171                            dest_y + dct_offset + 8, wrap_y);
2172
2173         if (s->flags & CODEC_FLAG_GRAY) {
2174             skip_dct[4] = 1;
2175             skip_dct[5] = 1;
2176         } else {
2177             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2178             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2179             if (!s->chroma_y_shift) { /* 422 */
2180                 s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2181                                    dest_cb + uv_dct_offset, wrap_c);
2182                 s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2183                                    dest_cr + uv_dct_offset, wrap_c);
2184             }
2185         }
2186         /* pre quantization */
2187         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2188                 2 * s->qscale * s->qscale) {
2189             // FIXME optimize
2190             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2191                               wrap_y, 8) < 20 * s->qscale)
2192                 skip_dct[0] = 1;
2193             if (s->dsp.sad[1](NULL, ptr_y + 8,
2194                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2195                 skip_dct[1] = 1;
2196             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2197                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2198                 skip_dct[2] = 1;
2199             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2200                               dest_y + dct_offset + 8,
2201                               wrap_y, 8) < 20 * s->qscale)
2202                 skip_dct[3] = 1;
2203             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2204                               wrap_c, 8) < 20 * s->qscale)
2205                 skip_dct[4] = 1;
2206             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2207                               wrap_c, 8) < 20 * s->qscale)
2208                 skip_dct[5] = 1;
2209             if (!s->chroma_y_shift) { /* 422 */
2210                 if (s->dsp.sad[1](NULL, ptr_cb + uv_dct_offset,
2211                                   dest_cb + uv_dct_offset,
2212                                   wrap_c, 8) < 20 * s->qscale)
2213                     skip_dct[6] = 1;
2214                 if (s->dsp.sad[1](NULL, ptr_cr + uv_dct_offset,
2215                                   dest_cr + uv_dct_offset,
2216                                   wrap_c, 8) < 20 * s->qscale)
2217                     skip_dct[7] = 1;
2218             }
2219         }
2220     }
2221
2222     if (s->quantizer_noise_shaping) {
2223         if (!skip_dct[0])
2224             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2225         if (!skip_dct[1])
2226             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2227         if (!skip_dct[2])
2228             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2229         if (!skip_dct[3])
2230             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2231         if (!skip_dct[4])
2232             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2233         if (!skip_dct[5])
2234             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2235         if (!s->chroma_y_shift) { /* 422 */
2236             if (!skip_dct[6])
2237                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2238                                   wrap_c);
2239             if (!skip_dct[7])
2240                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2241                                   wrap_c);
2242         }
2243         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2244     }
2245
2246     /* DCT & quantize */
2247     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2248     {
2249         for (i = 0; i < mb_block_count; i++) {
2250             if (!skip_dct[i]) {
2251                 int overflow;
2252                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2253                 // FIXME we could decide to change to quantizer instead of
2254                 // clipping
2255                 // JS: I don't think that would be a good idea it could lower
2256                 //     quality instead of improve it. Just INTRADC clipping
2257                 //     deserves changes in quantizer
2258                 if (overflow)
2259                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2260             } else
2261                 s->block_last_index[i] = -1;
2262         }
2263         if (s->quantizer_noise_shaping) {
2264             for (i = 0; i < mb_block_count; i++) {
2265                 if (!skip_dct[i]) {
2266                     s->block_last_index[i] =
2267                         dct_quantize_refine(s, s->block[i], weight[i],
2268                                             orig[i], i, s->qscale);
2269                 }
2270             }
2271         }
2272
2273         if (s->luma_elim_threshold && !s->mb_intra)
2274             for (i = 0; i < 4; i++)
2275                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2276         if (s->chroma_elim_threshold && !s->mb_intra)
2277             for (i = 4; i < mb_block_count; i++)
2278                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2279
2280         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2281             for (i = 0; i < mb_block_count; i++) {
2282                 if (s->block_last_index[i] == -1)
2283                     s->coded_score[i] = INT_MAX / 256;
2284             }
2285         }
2286     }
2287
2288     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2289         s->block_last_index[4] =
2290         s->block_last_index[5] = 0;
2291         s->block[4][0] =
2292         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2293         if (!s->chroma_y_shift) { /* 422 / 444 */
2294             for (i=6; i<12; i++) {
2295                 s->block_last_index[i] = 0;
2296                 s->block[i][0] = s->block[4][0];
2297             }
2298         }
2299     }
2300
2301     // non c quantize code returns incorrect block_last_index FIXME
2302     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2303         for (i = 0; i < mb_block_count; i++) {
2304             int j;
2305             if (s->block_last_index[i] > 0) {
2306                 for (j = 63; j > 0; j--) {
2307                     if (s->block[i][s->intra_scantable.permutated[j]])
2308                         break;
2309                 }
2310                 s->block_last_index[i] = j;
2311             }
2312         }
2313     }
2314
2315     /* huffman encode */
2316     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2317     case AV_CODEC_ID_MPEG1VIDEO:
2318     case AV_CODEC_ID_MPEG2VIDEO:
2319         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2320             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2321         break;
2322     case AV_CODEC_ID_MPEG4:
2323         if (CONFIG_MPEG4_ENCODER)
2324             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2325         break;
2326     case AV_CODEC_ID_MSMPEG4V2:
2327     case AV_CODEC_ID_MSMPEG4V3:
2328     case AV_CODEC_ID_WMV1:
2329         if (CONFIG_MSMPEG4_ENCODER)
2330             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2331         break;
2332     case AV_CODEC_ID_WMV2:
2333         if (CONFIG_WMV2_ENCODER)
2334             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2335         break;
2336     case AV_CODEC_ID_H261:
2337         if (CONFIG_H261_ENCODER)
2338             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2339         break;
2340     case AV_CODEC_ID_H263:
2341     case AV_CODEC_ID_H263P:
2342     case AV_CODEC_ID_FLV1:
2343     case AV_CODEC_ID_RV10:
2344     case AV_CODEC_ID_RV20:
2345         if (CONFIG_H263_ENCODER)
2346             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2347         break;
2348     case AV_CODEC_ID_MJPEG:
2349     case AV_CODEC_ID_AMV:
2350         if (CONFIG_MJPEG_ENCODER)
2351             ff_mjpeg_encode_mb(s, s->block);
2352         break;
2353     default:
2354         av_assert1(0);
2355     }
2356 }
2357
2358 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2359 {
2360     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2361     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2362     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2363 }
2364
2365 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2366     int i;
2367
2368     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2369
2370     /* mpeg1 */
2371     d->mb_skip_run= s->mb_skip_run;
2372     for(i=0; i<3; i++)
2373         d->last_dc[i] = s->last_dc[i];
2374
2375     /* statistics */
2376     d->mv_bits= s->mv_bits;
2377     d->i_tex_bits= s->i_tex_bits;
2378     d->p_tex_bits= s->p_tex_bits;
2379     d->i_count= s->i_count;
2380     d->f_count= s->f_count;
2381     d->b_count= s->b_count;
2382     d->skip_count= s->skip_count;
2383     d->misc_bits= s->misc_bits;
2384     d->last_bits= 0;
2385
2386     d->mb_skipped= 0;
2387     d->qscale= s->qscale;
2388     d->dquant= s->dquant;
2389
2390     d->esc3_level_length= s->esc3_level_length;
2391 }
2392
2393 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2394     int i;
2395
2396     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2397     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2398
2399     /* mpeg1 */
2400     d->mb_skip_run= s->mb_skip_run;
2401     for(i=0; i<3; i++)
2402         d->last_dc[i] = s->last_dc[i];
2403
2404     /* statistics */
2405     d->mv_bits= s->mv_bits;
2406     d->i_tex_bits= s->i_tex_bits;
2407     d->p_tex_bits= s->p_tex_bits;
2408     d->i_count= s->i_count;
2409     d->f_count= s->f_count;
2410     d->b_count= s->b_count;
2411     d->skip_count= s->skip_count;
2412     d->misc_bits= s->misc_bits;
2413
2414     d->mb_intra= s->mb_intra;
2415     d->mb_skipped= s->mb_skipped;
2416     d->mv_type= s->mv_type;
2417     d->mv_dir= s->mv_dir;
2418     d->pb= s->pb;
2419     if(s->data_partitioning){
2420         d->pb2= s->pb2;
2421         d->tex_pb= s->tex_pb;
2422     }
2423     d->block= s->block;
2424     for(i=0; i<8; i++)
2425         d->block_last_index[i]= s->block_last_index[i];
2426     d->interlaced_dct= s->interlaced_dct;
2427     d->qscale= s->qscale;
2428
2429     d->esc3_level_length= s->esc3_level_length;
2430 }
2431
2432 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2433                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2434                            int *dmin, int *next_block, int motion_x, int motion_y)
2435 {
2436     int score;
2437     uint8_t *dest_backup[3];
2438
2439     copy_context_before_encode(s, backup, type);
2440
2441     s->block= s->blocks[*next_block];
2442     s->pb= pb[*next_block];
2443     if(s->data_partitioning){
2444         s->pb2   = pb2   [*next_block];
2445         s->tex_pb= tex_pb[*next_block];
2446     }
2447
2448     if(*next_block){
2449         memcpy(dest_backup, s->dest, sizeof(s->dest));
2450         s->dest[0] = s->rd_scratchpad;
2451         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2452         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2453         av_assert0(s->linesize >= 32); //FIXME
2454     }
2455
2456     encode_mb(s, motion_x, motion_y);
2457
2458     score= put_bits_count(&s->pb);
2459     if(s->data_partitioning){
2460         score+= put_bits_count(&s->pb2);
2461         score+= put_bits_count(&s->tex_pb);
2462     }
2463
2464     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2465         ff_MPV_decode_mb(s, s->block);
2466
2467         score *= s->lambda2;
2468         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2469     }
2470
2471     if(*next_block){
2472         memcpy(s->dest, dest_backup, sizeof(s->dest));
2473     }
2474
2475     if(score<*dmin){
2476         *dmin= score;
2477         *next_block^=1;
2478
2479         copy_context_after_encode(best, s, type);
2480     }
2481 }
2482
2483 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2484     uint32_t *sq = ff_square_tab + 256;
2485     int acc=0;
2486     int x,y;
2487
2488     if(w==16 && h==16)
2489         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2490     else if(w==8 && h==8)
2491         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2492
2493     for(y=0; y<h; y++){
2494         for(x=0; x<w; x++){
2495             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2496         }
2497     }
2498
2499     av_assert2(acc>=0);
2500
2501     return acc;
2502 }
2503
2504 static int sse_mb(MpegEncContext *s){
2505     int w= 16;
2506     int h= 16;
2507
2508     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2509     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2510
2511     if(w==16 && h==16)
2512       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2513         return  s->dsp.nsse[0](s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2514                +s->dsp.nsse[1](s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2515                +s->dsp.nsse[1](s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2516       }else{
2517         return  s->dsp.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2518                +s->dsp.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2519                +s->dsp.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2520       }
2521     else
2522         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2523                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2524                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2525 }
2526
2527 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2528     MpegEncContext *s= *(void**)arg;
2529
2530
2531     s->me.pre_pass=1;
2532     s->me.dia_size= s->avctx->pre_dia_size;
2533     s->first_slice_line=1;
2534     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2535         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2536             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2537         }
2538         s->first_slice_line=0;
2539     }
2540
2541     s->me.pre_pass=0;
2542
2543     return 0;
2544 }
2545
2546 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2547     MpegEncContext *s= *(void**)arg;
2548
2549     ff_check_alignment();
2550
2551     s->me.dia_size= s->avctx->dia_size;
2552     s->first_slice_line=1;
2553     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2554         s->mb_x=0; //for block init below
2555         ff_init_block_index(s);
2556         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2557             s->block_index[0]+=2;
2558             s->block_index[1]+=2;
2559             s->block_index[2]+=2;
2560             s->block_index[3]+=2;
2561
2562             /* compute motion vector & mb_type and store in context */
2563             if(s->pict_type==AV_PICTURE_TYPE_B)
2564                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2565             else
2566                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2567         }
2568         s->first_slice_line=0;
2569     }
2570     return 0;
2571 }
2572
2573 static int mb_var_thread(AVCodecContext *c, void *arg){
2574     MpegEncContext *s= *(void**)arg;
2575     int mb_x, mb_y;
2576
2577     ff_check_alignment();
2578
2579     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2580         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2581             int xx = mb_x * 16;
2582             int yy = mb_y * 16;
2583             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2584             int varc;
2585             int sum = s->dsp.pix_sum(pix, s->linesize);
2586
2587             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2588
2589             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2590             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2591             s->me.mb_var_sum_temp    += varc;
2592         }
2593     }
2594     return 0;
2595 }
2596
2597 static void write_slice_end(MpegEncContext *s){
2598     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2599         if(s->partitioned_frame){
2600             ff_mpeg4_merge_partitions(s);
2601         }
2602
2603         ff_mpeg4_stuffing(&s->pb);
2604     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2605         ff_mjpeg_encode_stuffing(s);
2606     }
2607
2608     avpriv_align_put_bits(&s->pb);
2609     flush_put_bits(&s->pb);
2610
2611     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2612         s->misc_bits+= get_bits_diff(s);
2613 }
2614
2615 static void write_mb_info(MpegEncContext *s)
2616 {
2617     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2618     int offset = put_bits_count(&s->pb);
2619     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2620     int gobn = s->mb_y / s->gob_index;
2621     int pred_x, pred_y;
2622     if (CONFIG_H263_ENCODER)
2623         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2624     bytestream_put_le32(&ptr, offset);
2625     bytestream_put_byte(&ptr, s->qscale);
2626     bytestream_put_byte(&ptr, gobn);
2627     bytestream_put_le16(&ptr, mba);
2628     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2629     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2630     /* 4MV not implemented */
2631     bytestream_put_byte(&ptr, 0); /* hmv2 */
2632     bytestream_put_byte(&ptr, 0); /* vmv2 */
2633 }
2634
2635 static void update_mb_info(MpegEncContext *s, int startcode)
2636 {
2637     if (!s->mb_info)
2638         return;
2639     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2640         s->mb_info_size += 12;
2641         s->prev_mb_info = s->last_mb_info;
2642     }
2643     if (startcode) {
2644         s->prev_mb_info = put_bits_count(&s->pb)/8;
2645         /* This might have incremented mb_info_size above, and we return without
2646          * actually writing any info into that slot yet. But in that case,
2647          * this will be called again at the start of the after writing the
2648          * start code, actually writing the mb info. */
2649         return;
2650     }
2651
2652     s->last_mb_info = put_bits_count(&s->pb)/8;
2653     if (!s->mb_info_size)
2654         s->mb_info_size += 12;
2655     write_mb_info(s);
2656 }
2657
2658 static int encode_thread(AVCodecContext *c, void *arg){
2659     MpegEncContext *s= *(void**)arg;
2660     int mb_x, mb_y, pdif = 0;
2661     int chr_h= 16>>s->chroma_y_shift;
2662     int i, j;
2663     MpegEncContext best_s, backup_s;
2664     uint8_t bit_buf[2][MAX_MB_BYTES];
2665     uint8_t bit_buf2[2][MAX_MB_BYTES];
2666     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2667     PutBitContext pb[2], pb2[2], tex_pb[2];
2668
2669     ff_check_alignment();
2670
2671     for(i=0; i<2; i++){
2672         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2673         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2674         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2675     }
2676
2677     s->last_bits= put_bits_count(&s->pb);
2678     s->mv_bits=0;
2679     s->misc_bits=0;
2680     s->i_tex_bits=0;
2681     s->p_tex_bits=0;
2682     s->i_count=0;
2683     s->f_count=0;
2684     s->b_count=0;
2685     s->skip_count=0;
2686
2687     for(i=0; i<3; i++){
2688         /* init last dc values */
2689         /* note: quant matrix value (8) is implied here */
2690         s->last_dc[i] = 128 << s->intra_dc_precision;
2691
2692         s->current_picture.error[i] = 0;
2693     }
2694     if(s->codec_id==AV_CODEC_ID_AMV){
2695         s->last_dc[0] = 128*8/13;
2696         s->last_dc[1] = 128*8/14;
2697         s->last_dc[2] = 128*8/14;
2698     }
2699     s->mb_skip_run = 0;
2700     memset(s->last_mv, 0, sizeof(s->last_mv));
2701
2702     s->last_mv_dir = 0;
2703
2704     switch(s->codec_id){
2705     case AV_CODEC_ID_H263:
2706     case AV_CODEC_ID_H263P:
2707     case AV_CODEC_ID_FLV1:
2708         if (CONFIG_H263_ENCODER)
2709             s->gob_index = ff_h263_get_gob_height(s);
2710         break;
2711     case AV_CODEC_ID_MPEG4:
2712         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2713             ff_mpeg4_init_partitions(s);
2714         break;
2715     }
2716
2717     s->resync_mb_x=0;
2718     s->resync_mb_y=0;
2719     s->first_slice_line = 1;
2720     s->ptr_lastgob = s->pb.buf;
2721     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2722         s->mb_x=0;
2723         s->mb_y= mb_y;
2724
2725         ff_set_qscale(s, s->qscale);
2726         ff_init_block_index(s);
2727
2728         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2729             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2730             int mb_type= s->mb_type[xy];
2731 //            int d;
2732             int dmin= INT_MAX;
2733             int dir;
2734
2735             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2736                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2737                 return -1;
2738             }
2739             if(s->data_partitioning){
2740                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2741                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2742                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2743                     return -1;
2744                 }
2745             }
2746
2747             s->mb_x = mb_x;
2748             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2749             ff_update_block_index(s);
2750
2751             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2752                 ff_h261_reorder_mb_index(s);
2753                 xy= s->mb_y*s->mb_stride + s->mb_x;
2754                 mb_type= s->mb_type[xy];
2755             }
2756
2757             /* write gob / video packet header  */
2758             if(s->rtp_mode){
2759                 int current_packet_size, is_gob_start;
2760
2761                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2762
2763                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2764
2765                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2766
2767                 switch(s->codec_id){
2768                 case AV_CODEC_ID_H263:
2769                 case AV_CODEC_ID_H263P:
2770                     if(!s->h263_slice_structured)
2771                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2772                     break;
2773                 case AV_CODEC_ID_MPEG2VIDEO:
2774                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2775                 case AV_CODEC_ID_MPEG1VIDEO:
2776                     if(s->mb_skip_run) is_gob_start=0;
2777                     break;
2778                 case AV_CODEC_ID_MJPEG:
2779                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2780                     break;
2781                 }
2782
2783                 if(is_gob_start){
2784                     if(s->start_mb_y != mb_y || mb_x!=0){
2785                         write_slice_end(s);
2786
2787                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2788                             ff_mpeg4_init_partitions(s);
2789                         }
2790                     }
2791
2792                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2793                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2794
2795                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2796                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2797                         int d = 100 / s->error_rate;
2798                         if(r % d == 0){
2799                             current_packet_size=0;
2800                             s->pb.buf_ptr= s->ptr_lastgob;
2801                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2802                         }
2803                     }
2804
2805                     if (s->avctx->rtp_callback){
2806                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2807                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2808                     }
2809                     update_mb_info(s, 1);
2810
2811                     switch(s->codec_id){
2812                     case AV_CODEC_ID_MPEG4:
2813                         if (CONFIG_MPEG4_ENCODER) {
2814                             ff_mpeg4_encode_video_packet_header(s);
2815                             ff_mpeg4_clean_buffers(s);
2816                         }
2817                     break;
2818                     case AV_CODEC_ID_MPEG1VIDEO:
2819                     case AV_CODEC_ID_MPEG2VIDEO:
2820                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2821                             ff_mpeg1_encode_slice_header(s);
2822                             ff_mpeg1_clean_buffers(s);
2823                         }
2824                     break;
2825                     case AV_CODEC_ID_H263:
2826                     case AV_CODEC_ID_H263P:
2827                         if (CONFIG_H263_ENCODER)
2828                             ff_h263_encode_gob_header(s, mb_y);
2829                     break;
2830                     }
2831
2832                     if(s->flags&CODEC_FLAG_PASS1){
2833                         int bits= put_bits_count(&s->pb);
2834                         s->misc_bits+= bits - s->last_bits;
2835                         s->last_bits= bits;
2836                     }
2837
2838                     s->ptr_lastgob += current_packet_size;
2839                     s->first_slice_line=1;
2840                     s->resync_mb_x=mb_x;
2841                     s->resync_mb_y=mb_y;
2842                 }
2843             }
2844
2845             if(  (s->resync_mb_x   == s->mb_x)
2846                && s->resync_mb_y+1 == s->mb_y){
2847                 s->first_slice_line=0;
2848             }
2849
2850             s->mb_skipped=0;
2851             s->dquant=0; //only for QP_RD
2852
2853             update_mb_info(s, 0);
2854
2855             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2856                 int next_block=0;
2857                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2858
2859                 copy_context_before_encode(&backup_s, s, -1);
2860                 backup_s.pb= s->pb;
2861                 best_s.data_partitioning= s->data_partitioning;
2862                 best_s.partitioned_frame= s->partitioned_frame;
2863                 if(s->data_partitioning){
2864                     backup_s.pb2= s->pb2;
2865                     backup_s.tex_pb= s->tex_pb;
2866                 }
2867
2868                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2869                     s->mv_dir = MV_DIR_FORWARD;
2870                     s->mv_type = MV_TYPE_16X16;
2871                     s->mb_intra= 0;
2872                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2873                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2874                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2875                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2876                 }
2877                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2878                     s->mv_dir = MV_DIR_FORWARD;
2879                     s->mv_type = MV_TYPE_FIELD;
2880                     s->mb_intra= 0;
2881                     for(i=0; i<2; i++){
2882                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2883                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2884                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2885                     }
2886                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2887                                  &dmin, &next_block, 0, 0);
2888                 }
2889                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2890                     s->mv_dir = MV_DIR_FORWARD;
2891                     s->mv_type = MV_TYPE_16X16;
2892                     s->mb_intra= 0;
2893                     s->mv[0][0][0] = 0;
2894                     s->mv[0][0][1] = 0;
2895                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2896                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2897                 }
2898                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2899                     s->mv_dir = MV_DIR_FORWARD;
2900                     s->mv_type = MV_TYPE_8X8;
2901                     s->mb_intra= 0;
2902                     for(i=0; i<4; i++){
2903                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2904                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2905                     }
2906                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2907                                  &dmin, &next_block, 0, 0);
2908                 }
2909                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2910                     s->mv_dir = MV_DIR_FORWARD;
2911                     s->mv_type = MV_TYPE_16X16;
2912                     s->mb_intra= 0;
2913                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2914                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2915                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2916                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2917                 }
2918                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2919                     s->mv_dir = MV_DIR_BACKWARD;
2920                     s->mv_type = MV_TYPE_16X16;
2921                     s->mb_intra= 0;
2922                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2923                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2924                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2925                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2926                 }
2927                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2928                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2929                     s->mv_type = MV_TYPE_16X16;
2930                     s->mb_intra= 0;
2931                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2932                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2933                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2934                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2935                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2936                                  &dmin, &next_block, 0, 0);
2937                 }
2938                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2939                     s->mv_dir = MV_DIR_FORWARD;
2940                     s->mv_type = MV_TYPE_FIELD;
2941                     s->mb_intra= 0;
2942                     for(i=0; i<2; i++){
2943                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2944                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2945                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2946                     }
2947                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2948                                  &dmin, &next_block, 0, 0);
2949                 }
2950                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2951                     s->mv_dir = MV_DIR_BACKWARD;
2952                     s->mv_type = MV_TYPE_FIELD;
2953                     s->mb_intra= 0;
2954                     for(i=0; i<2; i++){
2955                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2956                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2957                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2958                     }
2959                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2960                                  &dmin, &next_block, 0, 0);
2961                 }
2962                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2963                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2964                     s->mv_type = MV_TYPE_FIELD;
2965                     s->mb_intra= 0;
2966                     for(dir=0; dir<2; dir++){
2967                         for(i=0; i<2; i++){
2968                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2969                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2970                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2971                         }
2972                     }
2973                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2974                                  &dmin, &next_block, 0, 0);
2975                 }
2976                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2977                     s->mv_dir = 0;
2978                     s->mv_type = MV_TYPE_16X16;
2979                     s->mb_intra= 1;
2980                     s->mv[0][0][0] = 0;
2981                     s->mv[0][0][1] = 0;
2982                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2983                                  &dmin, &next_block, 0, 0);
2984                     if(s->h263_pred || s->h263_aic){
2985                         if(best_s.mb_intra)
2986                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2987                         else
2988                             ff_clean_intra_table_entries(s); //old mode?
2989                     }
2990                 }
2991
2992                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2993                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2994                         const int last_qp= backup_s.qscale;
2995                         int qpi, qp, dc[6];
2996                         int16_t ac[6][16];
2997                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2998                         static const int dquant_tab[4]={-1,1,-2,2};
2999                         int storecoefs = s->mb_intra && s->dc_val[0];
3000
3001                         av_assert2(backup_s.dquant == 0);
3002
3003                         //FIXME intra
3004                         s->mv_dir= best_s.mv_dir;
3005                         s->mv_type = MV_TYPE_16X16;
3006                         s->mb_intra= best_s.mb_intra;
3007                         s->mv[0][0][0] = best_s.mv[0][0][0];
3008                         s->mv[0][0][1] = best_s.mv[0][0][1];
3009                         s->mv[1][0][0] = best_s.mv[1][0][0];
3010                         s->mv[1][0][1] = best_s.mv[1][0][1];
3011
3012                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3013                         for(; qpi<4; qpi++){
3014                             int dquant= dquant_tab[qpi];
3015                             qp= last_qp + dquant;
3016                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3017                                 continue;
3018                             backup_s.dquant= dquant;
3019                             if(storecoefs){
3020                                 for(i=0; i<6; i++){
3021                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3022                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3023                                 }
3024                             }
3025
3026                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3027                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3028                             if(best_s.qscale != qp){
3029                                 if(storecoefs){
3030                                     for(i=0; i<6; i++){
3031                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3032                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3033                                     }
3034                                 }
3035                             }
3036                         }
3037                     }
3038                 }
3039                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3040                     int mx= s->b_direct_mv_table[xy][0];
3041                     int my= s->b_direct_mv_table[xy][1];
3042
3043                     backup_s.dquant = 0;
3044                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3045                     s->mb_intra= 0;
3046                     ff_mpeg4_set_direct_mv(s, mx, my);
3047                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3048                                  &dmin, &next_block, mx, my);
3049                 }
3050                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3051                     backup_s.dquant = 0;
3052                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3053                     s->mb_intra= 0;
3054                     ff_mpeg4_set_direct_mv(s, 0, 0);
3055                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3056                                  &dmin, &next_block, 0, 0);
3057                 }
3058                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3059                     int coded=0;
3060                     for(i=0; i<6; i++)
3061                         coded |= s->block_last_index[i];
3062                     if(coded){
3063                         int mx,my;
3064                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3065                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3066                             mx=my=0; //FIXME find the one we actually used
3067                             ff_mpeg4_set_direct_mv(s, mx, my);
3068                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3069                             mx= s->mv[1][0][0];
3070                             my= s->mv[1][0][1];
3071                         }else{
3072                             mx= s->mv[0][0][0];
3073                             my= s->mv[0][0][1];
3074                         }
3075
3076                         s->mv_dir= best_s.mv_dir;
3077                         s->mv_type = best_s.mv_type;
3078                         s->mb_intra= 0;
3079 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3080                         s->mv[0][0][1] = best_s.mv[0][0][1];
3081                         s->mv[1][0][0] = best_s.mv[1][0][0];
3082                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3083                         backup_s.dquant= 0;
3084                         s->skipdct=1;
3085                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3086                                         &dmin, &next_block, mx, my);
3087                         s->skipdct=0;
3088                     }
3089                 }
3090
3091                 s->current_picture.qscale_table[xy] = best_s.qscale;
3092
3093                 copy_context_after_encode(s, &best_s, -1);
3094
3095                 pb_bits_count= put_bits_count(&s->pb);
3096                 flush_put_bits(&s->pb);
3097                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3098                 s->pb= backup_s.pb;
3099
3100                 if(s->data_partitioning){
3101                     pb2_bits_count= put_bits_count(&s->pb2);
3102                     flush_put_bits(&s->pb2);
3103                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3104                     s->pb2= backup_s.pb2;
3105
3106                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3107                     flush_put_bits(&s->tex_pb);
3108                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3109                     s->tex_pb= backup_s.tex_pb;
3110                 }
3111                 s->last_bits= put_bits_count(&s->pb);
3112
3113                 if (CONFIG_H263_ENCODER &&
3114                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3115                     ff_h263_update_motion_val(s);
3116
3117                 if(next_block==0){ //FIXME 16 vs linesize16
3118                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3119                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3120                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3121                 }
3122
3123                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3124                     ff_MPV_decode_mb(s, s->block);
3125             } else {
3126                 int motion_x = 0, motion_y = 0;
3127                 s->mv_type=MV_TYPE_16X16;
3128                 // only one MB-Type possible
3129
3130                 switch(mb_type){
3131                 case CANDIDATE_MB_TYPE_INTRA:
3132                     s->mv_dir = 0;
3133                     s->mb_intra= 1;
3134                     motion_x= s->mv[0][0][0] = 0;
3135                     motion_y= s->mv[0][0][1] = 0;
3136                     break;
3137                 case CANDIDATE_MB_TYPE_INTER:
3138                     s->mv_dir = MV_DIR_FORWARD;
3139                     s->mb_intra= 0;
3140                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3141                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3142                     break;
3143                 case CANDIDATE_MB_TYPE_INTER_I:
3144                     s->mv_dir = MV_DIR_FORWARD;
3145                     s->mv_type = MV_TYPE_FIELD;
3146                     s->mb_intra= 0;
3147                     for(i=0; i<2; i++){
3148                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3149                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3150                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3151                     }
3152                     break;
3153                 case CANDIDATE_MB_TYPE_INTER4V:
3154                     s->mv_dir = MV_DIR_FORWARD;
3155                     s->mv_type = MV_TYPE_8X8;
3156                     s->mb_intra= 0;
3157                     for(i=0; i<4; i++){
3158                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3159                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3160                     }
3161                     break;
3162                 case CANDIDATE_MB_TYPE_DIRECT:
3163                     if (CONFIG_MPEG4_ENCODER) {
3164                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3165                         s->mb_intra= 0;
3166                         motion_x=s->b_direct_mv_table[xy][0];
3167                         motion_y=s->b_direct_mv_table[xy][1];
3168                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3169                     }
3170                     break;
3171                 case CANDIDATE_MB_TYPE_DIRECT0:
3172                     if (CONFIG_MPEG4_ENCODER) {
3173                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3174                         s->mb_intra= 0;
3175                         ff_mpeg4_set_direct_mv(s, 0, 0);
3176                     }
3177                     break;
3178                 case CANDIDATE_MB_TYPE_BIDIR:
3179                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3180                     s->mb_intra= 0;
3181                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3182                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3183                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3184                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3185                     break;
3186                 case CANDIDATE_MB_TYPE_BACKWARD:
3187                     s->mv_dir = MV_DIR_BACKWARD;
3188                     s->mb_intra= 0;
3189                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3190                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3191                     break;
3192                 case CANDIDATE_MB_TYPE_FORWARD:
3193                     s->mv_dir = MV_DIR_FORWARD;
3194                     s->mb_intra= 0;
3195                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3196                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3197                     break;
3198                 case CANDIDATE_MB_TYPE_FORWARD_I:
3199                     s->mv_dir = MV_DIR_FORWARD;
3200                     s->mv_type = MV_TYPE_FIELD;
3201                     s->mb_intra= 0;
3202                     for(i=0; i<2; i++){
3203                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3204                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3205                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3206                     }
3207                     break;
3208                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3209                     s->mv_dir = MV_DIR_BACKWARD;
3210                     s->mv_type = MV_TYPE_FIELD;
3211                     s->mb_intra= 0;
3212                     for(i=0; i<2; i++){
3213                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3214                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3215                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3216                     }
3217                     break;
3218                 case CANDIDATE_MB_TYPE_BIDIR_I:
3219                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3220                     s->mv_type = MV_TYPE_FIELD;
3221                     s->mb_intra= 0;
3222                     for(dir=0; dir<2; dir++){
3223                         for(i=0; i<2; i++){
3224                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3225                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3226                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3227                         }
3228                     }
3229                     break;
3230                 default:
3231                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3232                 }
3233
3234                 encode_mb(s, motion_x, motion_y);
3235
3236                 // RAL: Update last macroblock type
3237                 s->last_mv_dir = s->mv_dir;
3238
3239                 if (CONFIG_H263_ENCODER &&
3240                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3241                     ff_h263_update_motion_val(s);
3242
3243                 ff_MPV_decode_mb(s, s->block);
3244             }
3245
3246             /* clean the MV table in IPS frames for direct mode in B frames */
3247             if(s->mb_intra /* && I,P,S_TYPE */){
3248                 s->p_mv_table[xy][0]=0;
3249                 s->p_mv_table[xy][1]=0;
3250             }
3251
3252             if(s->flags&CODEC_FLAG_PSNR){
3253                 int w= 16;
3254                 int h= 16;
3255
3256                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3257                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3258
3259                 s->current_picture.error[0] += sse(
3260                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3261                     s->dest[0], w, h, s->linesize);
3262                 s->current_picture.error[1] += sse(
3263                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3264                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3265                 s->current_picture.error[2] += sse(
3266                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3267                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3268             }
3269             if(s->loop_filter){
3270                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3271                     ff_h263_loop_filter(s);
3272             }
3273             av_dlog(s->avctx, "MB %d %d bits\n",
3274                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3275         }
3276     }
3277
3278     //not beautiful here but we must write it before flushing so it has to be here
3279     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3280         ff_msmpeg4_encode_ext_header(s);
3281
3282     write_slice_end(s);
3283
3284     /* Send the last GOB if RTP */
3285     if (s->avctx->rtp_callback) {
3286         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3287         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3288         /* Call the RTP callback to send the last GOB */
3289         emms_c();
3290         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3291     }
3292
3293     return 0;
3294 }
3295
3296 #define MERGE(field) dst->field += src->field; src->field=0
3297 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3298     MERGE(me.scene_change_score);
3299     MERGE(me.mc_mb_var_sum_temp);
3300     MERGE(me.mb_var_sum_temp);
3301 }
3302
3303 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3304     int i;
3305
3306     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3307     MERGE(dct_count[1]);
3308     MERGE(mv_bits);
3309     MERGE(i_tex_bits);
3310     MERGE(p_tex_bits);
3311     MERGE(i_count);
3312     MERGE(f_count);
3313     MERGE(b_count);
3314     MERGE(skip_count);
3315     MERGE(misc_bits);
3316     MERGE(er.error_count);
3317     MERGE(padding_bug_score);
3318     MERGE(current_picture.error[0]);
3319     MERGE(current_picture.error[1]);
3320     MERGE(current_picture.error[2]);
3321
3322     if(dst->avctx->noise_reduction){
3323         for(i=0; i<64; i++){
3324             MERGE(dct_error_sum[0][i]);
3325             MERGE(dct_error_sum[1][i]);
3326         }
3327     }
3328
3329     assert(put_bits_count(&src->pb) % 8 ==0);
3330     assert(put_bits_count(&dst->pb) % 8 ==0);
3331     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3332     flush_put_bits(&dst->pb);
3333 }
3334
3335 static int estimate_qp(MpegEncContext *s, int dry_run){
3336     if (s->next_lambda){
3337         s->current_picture_ptr->f->quality =
3338         s->current_picture.f->quality = s->next_lambda;
3339         if(!dry_run) s->next_lambda= 0;
3340     } else if (!s->fixed_qscale) {
3341         s->current_picture_ptr->f->quality =
3342         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3343         if (s->current_picture.f->quality < 0)
3344             return -1;
3345     }
3346
3347     if(s->adaptive_quant){
3348         switch(s->codec_id){
3349         case AV_CODEC_ID_MPEG4:
3350             if (CONFIG_MPEG4_ENCODER)
3351                 ff_clean_mpeg4_qscales(s);
3352             break;
3353         case AV_CODEC_ID_H263:
3354         case AV_CODEC_ID_H263P:
3355         case AV_CODEC_ID_FLV1:
3356             if (CONFIG_H263_ENCODER)
3357                 ff_clean_h263_qscales(s);
3358             break;
3359         default:
3360             ff_init_qscale_tab(s);
3361         }
3362
3363         s->lambda= s->lambda_table[0];
3364         //FIXME broken
3365     }else
3366         s->lambda = s->current_picture.f->quality;
3367     update_qscale(s);
3368     return 0;
3369 }
3370
3371 /* must be called before writing the header */
3372 static void set_frame_distances(MpegEncContext * s){
3373     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3374     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3375
3376     if(s->pict_type==AV_PICTURE_TYPE_B){
3377         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3378         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3379     }else{
3380         s->pp_time= s->time - s->last_non_b_time;
3381         s->last_non_b_time= s->time;
3382         assert(s->picture_number==0 || s->pp_time > 0);
3383     }
3384 }
3385
3386 static int encode_picture(MpegEncContext *s, int picture_number)
3387 {
3388     int i, ret;
3389     int bits;
3390     int context_count = s->slice_context_count;
3391
3392     s->picture_number = picture_number;
3393
3394     /* Reset the average MB variance */
3395     s->me.mb_var_sum_temp    =
3396     s->me.mc_mb_var_sum_temp = 0;
3397
3398     /* we need to initialize some time vars before we can encode b-frames */
3399     // RAL: Condition added for MPEG1VIDEO
3400     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3401         set_frame_distances(s);
3402     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3403         ff_set_mpeg4_time(s);
3404
3405     s->me.scene_change_score=0;
3406
3407 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3408
3409     if(s->pict_type==AV_PICTURE_TYPE_I){
3410         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3411         else                        s->no_rounding=0;
3412     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3413         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3414             s->no_rounding ^= 1;
3415     }
3416
3417     if(s->flags & CODEC_FLAG_PASS2){
3418         if (estimate_qp(s,1) < 0)
3419             return -1;
3420         ff_get_2pass_fcode(s);
3421     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3422         if(s->pict_type==AV_PICTURE_TYPE_B)
3423             s->lambda= s->last_lambda_for[s->pict_type];
3424         else
3425             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3426         update_qscale(s);
3427     }
3428
3429     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3430         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3431         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3432         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3433         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3434     }
3435
3436     s->mb_intra=0; //for the rate distortion & bit compare functions
3437     for(i=1; i<context_count; i++){
3438         ret = ff_update_duplicate_context(s->thread_context[i], s);
3439         if (ret < 0)
3440             return ret;
3441     }
3442
3443     if(ff_init_me(s)<0)
3444         return -1;
3445
3446     /* Estimate motion for every MB */
3447     if(s->pict_type != AV_PICTURE_TYPE_I){
3448         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3449         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3450         if (s->pict_type != AV_PICTURE_TYPE_B) {
3451             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3452                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3453             }
3454         }
3455
3456         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3457     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3458         /* I-Frame */
3459         for(i=0; i<s->mb_stride*s->mb_height; i++)
3460             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3461
3462         if(!s->fixed_qscale){
3463             /* finding spatial complexity for I-frame rate control */
3464             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3465         }
3466     }
3467     for(i=1; i<context_count; i++){
3468         merge_context_after_me(s, s->thread_context[i]);
3469     }
3470     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3471     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3472     emms_c();
3473
3474     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3475         s->pict_type= AV_PICTURE_TYPE_I;
3476         for(i=0; i<s->mb_stride*s->mb_height; i++)
3477             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3478         if(s->msmpeg4_version >= 3)
3479             s->no_rounding=1;
3480         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3481                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3482     }
3483
3484     if(!s->umvplus){
3485         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3486             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3487
3488             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3489                 int a,b;
3490                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3491                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3492                 s->f_code= FFMAX3(s->f_code, a, b);
3493             }
3494
3495             ff_fix_long_p_mvs(s);
3496             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3497             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3498                 int j;
3499                 for(i=0; i<2; i++){
3500                     for(j=0; j<2; j++)
3501                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3502                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3503                 }
3504             }
3505         }
3506
3507         if(s->pict_type==AV_PICTURE_TYPE_B){
3508             int a, b;
3509
3510             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3511             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3512             s->f_code = FFMAX(a, b);
3513
3514             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3515             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3516             s->b_code = FFMAX(a, b);
3517
3518             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3519             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3520             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3521             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3522             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3523                 int dir, j;
3524                 for(dir=0; dir<2; dir++){
3525                     for(i=0; i<2; i++){
3526                         for(j=0; j<2; j++){
3527                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3528                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3529                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3530                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3531                         }
3532                     }
3533                 }
3534             }
3535         }
3536     }
3537
3538     if (estimate_qp(s, 0) < 0)
3539         return -1;
3540
3541     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3542         s->qscale= 3; //reduce clipping problems
3543
3544     if (s->out_format == FMT_MJPEG) {
3545         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3546         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3547
3548         if (s->avctx->intra_matrix) {
3549             chroma_matrix =
3550             luma_matrix = s->avctx->intra_matrix;
3551         }
3552         if (s->avctx->chroma_intra_matrix)
3553             chroma_matrix = s->avctx->chroma_intra_matrix;
3554
3555         /* for mjpeg, we do include qscale in the matrix */
3556         for(i=1;i<64;i++){
3557             int j = s->idsp.idct_permutation[i];
3558
3559             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3560             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3561         }
3562         s->y_dc_scale_table=
3563         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3564         s->chroma_intra_matrix[0] =
3565         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3566         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3567                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3568         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3569                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3570         s->qscale= 8;
3571     }
3572     if(s->codec_id == AV_CODEC_ID_AMV){
3573         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3574         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3575         for(i=1;i<64;i++){
3576             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3577
3578             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3579             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3580         }
3581         s->y_dc_scale_table= y;
3582         s->c_dc_scale_table= c;
3583         s->intra_matrix[0] = 13;
3584         s->chroma_intra_matrix[0] = 14;
3585         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3586                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3587         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3588                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3589         s->qscale= 8;
3590     }
3591
3592     //FIXME var duplication
3593     s->current_picture_ptr->f->key_frame =
3594     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3595     s->current_picture_ptr->f->pict_type =
3596     s->current_picture.f->pict_type = s->pict_type;
3597
3598     if (s->current_picture.f->key_frame)
3599         s->picture_in_gop_number=0;
3600
3601     s->mb_x = s->mb_y = 0;
3602     s->last_bits= put_bits_count(&s->pb);
3603     switch(s->out_format) {
3604     case FMT_MJPEG:
3605         if (CONFIG_MJPEG_ENCODER)
3606             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3607                                            s->intra_matrix, s->chroma_intra_matrix);
3608         break;
3609     case FMT_H261:
3610         if (CONFIG_H261_ENCODER)
3611             ff_h261_encode_picture_header(s, picture_number);
3612         break;
3613     case FMT_H263:
3614         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3615             ff_wmv2_encode_picture_header(s, picture_number);
3616         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3617             ff_msmpeg4_encode_picture_header(s, picture_number);
3618         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3619             ff_mpeg4_encode_picture_header(s, picture_number);
3620         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3621             ff_rv10_encode_picture_header(s, picture_number);
3622         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3623             ff_rv20_encode_picture_header(s, picture_number);
3624         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3625             ff_flv_encode_picture_header(s, picture_number);
3626         else if (CONFIG_H263_ENCODER)
3627             ff_h263_encode_picture_header(s, picture_number);
3628         break;
3629     case FMT_MPEG1:
3630         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3631             ff_mpeg1_encode_picture_header(s, picture_number);
3632         break;
3633     default:
3634         av_assert0(0);
3635     }
3636     bits= put_bits_count(&s->pb);
3637     s->header_bits= bits - s->last_bits;
3638
3639     for(i=1; i<context_count; i++){
3640         update_duplicate_context_after_me(s->thread_context[i], s);
3641     }
3642     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3643     for(i=1; i<context_count; i++){
3644         merge_context_after_encode(s, s->thread_context[i]);
3645     }
3646     emms_c();
3647     return 0;
3648 }
3649
3650 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3651     const int intra= s->mb_intra;
3652     int i;
3653
3654     s->dct_count[intra]++;
3655
3656     for(i=0; i<64; i++){
3657         int level= block[i];
3658
3659         if(level){
3660             if(level>0){
3661                 s->dct_error_sum[intra][i] += level;
3662                 level -= s->dct_offset[intra][i];
3663                 if(level<0) level=0;
3664             }else{
3665                 s->dct_error_sum[intra][i] -= level;
3666                 level += s->dct_offset[intra][i];
3667                 if(level>0) level=0;
3668             }
3669             block[i]= level;
3670         }
3671     }
3672 }
3673
3674 static int dct_quantize_trellis_c(MpegEncContext *s,
3675                                   int16_t *block, int n,
3676                                   int qscale, int *overflow){
3677     const int *qmat;
3678     const uint8_t *scantable= s->intra_scantable.scantable;
3679     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3680     int max=0;
3681     unsigned int threshold1, threshold2;
3682     int bias=0;
3683     int run_tab[65];
3684     int level_tab[65];
3685     int score_tab[65];
3686     int survivor[65];
3687     int survivor_count;
3688     int last_run=0;
3689     int last_level=0;
3690     int last_score= 0;
3691     int last_i;
3692     int coeff[2][64];
3693     int coeff_count[64];
3694     int qmul, qadd, start_i, last_non_zero, i, dc;
3695     const int esc_length= s->ac_esc_length;
3696     uint8_t * length;
3697     uint8_t * last_length;
3698     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3699
3700     s->dsp.fdct (block);
3701
3702     if(s->dct_error_sum)
3703         s->denoise_dct(s, block);
3704     qmul= qscale*16;
3705     qadd= ((qscale-1)|1)*8;
3706
3707     if (s->mb_intra) {
3708         int q;
3709         if (!s->h263_aic) {
3710             if (n < 4)
3711                 q = s->y_dc_scale;
3712             else
3713                 q = s->c_dc_scale;
3714             q = q << 3;
3715         } else{
3716             /* For AIC we skip quant/dequant of INTRADC */
3717             q = 1 << 3;
3718             qadd=0;
3719         }
3720
3721         /* note: block[0] is assumed to be positive */
3722         block[0] = (block[0] + (q >> 1)) / q;
3723         start_i = 1;
3724         last_non_zero = 0;
3725         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3726         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3727             bias= 1<<(QMAT_SHIFT-1);
3728         length     = s->intra_ac_vlc_length;
3729         last_length= s->intra_ac_vlc_last_length;
3730     } else {
3731         start_i = 0;
3732         last_non_zero = -1;
3733         qmat = s->q_inter_matrix[qscale];
3734         length     = s->inter_ac_vlc_length;
3735         last_length= s->inter_ac_vlc_last_length;
3736     }
3737     last_i= start_i;
3738
3739     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3740     threshold2= (threshold1<<1);
3741
3742     for(i=63; i>=start_i; i--) {
3743         const int j = scantable[i];
3744         int level = block[j] * qmat[j];
3745
3746         if(((unsigned)(level+threshold1))>threshold2){
3747             last_non_zero = i;
3748             break;
3749         }
3750     }
3751
3752     for(i=start_i; i<=last_non_zero; i++) {
3753         const int j = scantable[i];
3754         int level = block[j] * qmat[j];
3755
3756 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3757 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3758         if(((unsigned)(level+threshold1))>threshold2){
3759             if(level>0){
3760                 level= (bias + level)>>QMAT_SHIFT;
3761                 coeff[0][i]= level;
3762                 coeff[1][i]= level-1;
3763 //                coeff[2][k]= level-2;
3764             }else{
3765                 level= (bias - level)>>QMAT_SHIFT;
3766                 coeff[0][i]= -level;
3767                 coeff[1][i]= -level+1;
3768 //                coeff[2][k]= -level+2;
3769             }
3770             coeff_count[i]= FFMIN(level, 2);
3771             av_assert2(coeff_count[i]);
3772             max |=level;
3773         }else{
3774             coeff[0][i]= (level>>31)|1;
3775             coeff_count[i]= 1;
3776         }
3777     }
3778
3779     *overflow= s->max_qcoeff < max; //overflow might have happened
3780
3781     if(last_non_zero < start_i){
3782         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3783         return last_non_zero;
3784     }
3785
3786     score_tab[start_i]= 0;
3787     survivor[0]= start_i;
3788     survivor_count= 1;
3789
3790     for(i=start_i; i<=last_non_zero; i++){
3791         int level_index, j, zero_distortion;
3792         int dct_coeff= FFABS(block[ scantable[i] ]);
3793         int best_score=256*256*256*120;
3794
3795         if (s->dsp.fdct == ff_fdct_ifast)
3796             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3797         zero_distortion= dct_coeff*dct_coeff;
3798
3799         for(level_index=0; level_index < coeff_count[i]; level_index++){
3800             int distortion;
3801             int level= coeff[level_index][i];
3802             const int alevel= FFABS(level);
3803             int unquant_coeff;
3804
3805             av_assert2(level);
3806
3807             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3808                 unquant_coeff= alevel*qmul + qadd;
3809             }else{ //MPEG1
3810                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3811                 if(s->mb_intra){
3812                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3813                         unquant_coeff =   (unquant_coeff - 1) | 1;
3814                 }else{
3815                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3816                         unquant_coeff =   (unquant_coeff - 1) | 1;
3817                 }
3818                 unquant_coeff<<= 3;
3819             }
3820
3821             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3822             level+=64;
3823             if((level&(~127)) == 0){
3824                 for(j=survivor_count-1; j>=0; j--){
3825                     int run= i - survivor[j];
3826                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3827                     score += score_tab[i-run];
3828
3829                     if(score < best_score){
3830                         best_score= score;
3831                         run_tab[i+1]= run;
3832                         level_tab[i+1]= level-64;
3833                     }
3834                 }
3835
3836                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3837                     for(j=survivor_count-1; j>=0; j--){
3838                         int run= i - survivor[j];
3839                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3840                         score += score_tab[i-run];
3841                         if(score < last_score){
3842                             last_score= score;
3843                             last_run= run;
3844                             last_level= level-64;
3845                             last_i= i+1;
3846                         }
3847                     }
3848                 }
3849             }else{
3850                 distortion += esc_length*lambda;
3851                 for(j=survivor_count-1; j>=0; j--){
3852                     int run= i - survivor[j];
3853                     int score= distortion + score_tab[i-run];
3854
3855                     if(score < best_score){
3856                         best_score= score;
3857                         run_tab[i+1]= run;
3858                         level_tab[i+1]= level-64;
3859                     }
3860                 }
3861
3862                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3863                   for(j=survivor_count-1; j>=0; j--){
3864                         int run= i - survivor[j];
3865                         int score= distortion + score_tab[i-run];
3866                         if(score < last_score){
3867                             last_score= score;
3868                             last_run= run;
3869                             last_level= level-64;
3870                             last_i= i+1;
3871                         }
3872                     }
3873                 }
3874             }
3875         }
3876
3877         score_tab[i+1]= best_score;
3878
3879         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3880         if(last_non_zero <= 27){
3881             for(; survivor_count; survivor_count--){
3882                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3883                     break;
3884             }
3885         }else{
3886             for(; survivor_count; survivor_count--){
3887                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3888                     break;
3889             }
3890         }
3891
3892         survivor[ survivor_count++ ]= i+1;
3893     }
3894
3895     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3896         last_score= 256*256*256*120;
3897         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3898             int score= score_tab[i];
3899             if(i) score += lambda*2; //FIXME exacter?
3900
3901             if(score < last_score){
3902                 last_score= score;
3903                 last_i= i;
3904                 last_level= level_tab[i];
3905                 last_run= run_tab[i];
3906             }
3907         }
3908     }
3909
3910     s->coded_score[n] = last_score;
3911
3912     dc= FFABS(block[0]);
3913     last_non_zero= last_i - 1;
3914     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3915
3916     if(last_non_zero < start_i)
3917         return last_non_zero;
3918
3919     if(last_non_zero == 0 && start_i == 0){
3920         int best_level= 0;
3921         int best_score= dc * dc;
3922
3923         for(i=0; i<coeff_count[0]; i++){
3924             int level= coeff[i][0];
3925             int alevel= FFABS(level);
3926             int unquant_coeff, score, distortion;
3927
3928             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3929                     unquant_coeff= (alevel*qmul + qadd)>>3;
3930             }else{ //MPEG1
3931                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3932                     unquant_coeff =   (unquant_coeff - 1) | 1;
3933             }
3934             unquant_coeff = (unquant_coeff + 4) >> 3;
3935             unquant_coeff<<= 3 + 3;
3936
3937             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3938             level+=64;
3939             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3940             else                    score= distortion + esc_length*lambda;
3941
3942             if(score < best_score){
3943                 best_score= score;
3944                 best_level= level - 64;
3945             }
3946         }
3947         block[0]= best_level;
3948         s->coded_score[n] = best_score - dc*dc;
3949         if(best_level == 0) return -1;
3950         else                return last_non_zero;
3951     }
3952
3953     i= last_i;
3954     av_assert2(last_level);
3955
3956     block[ perm_scantable[last_non_zero] ]= last_level;
3957     i -= last_run + 1;
3958
3959     for(; i>start_i; i -= run_tab[i] + 1){
3960         block[ perm_scantable[i-1] ]= level_tab[i];
3961     }
3962
3963     return last_non_zero;
3964 }
3965
3966 //#define REFINE_STATS 1
3967 static int16_t basis[64][64];
3968
3969 static void build_basis(uint8_t *perm){
3970     int i, j, x, y;
3971     emms_c();
3972     for(i=0; i<8; i++){
3973         for(j=0; j<8; j++){
3974             for(y=0; y<8; y++){
3975                 for(x=0; x<8; x++){
3976                     double s= 0.25*(1<<BASIS_SHIFT);
3977                     int index= 8*i + j;
3978                     int perm_index= perm[index];
3979                     if(i==0) s*= sqrt(0.5);
3980                     if(j==0) s*= sqrt(0.5);
3981                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3982                 }
3983             }
3984         }
3985     }
3986 }
3987
3988 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3989                         int16_t *block, int16_t *weight, int16_t *orig,
3990                         int n, int qscale){
3991     int16_t rem[64];
3992     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3993     const uint8_t *scantable= s->intra_scantable.scantable;
3994     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3995 //    unsigned int threshold1, threshold2;
3996 //    int bias=0;
3997     int run_tab[65];
3998     int prev_run=0;
3999     int prev_level=0;
4000     int qmul, qadd, start_i, last_non_zero, i, dc;
4001     uint8_t * length;
4002     uint8_t * last_length;
4003     int lambda;
4004     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4005 #ifdef REFINE_STATS
4006 static int count=0;
4007 static int after_last=0;
4008 static int to_zero=0;
4009 static int from_zero=0;
4010 static int raise=0;
4011 static int lower=0;
4012 static int messed_sign=0;
4013 #endif
4014
4015     if(basis[0][0] == 0)
4016         build_basis(s->idsp.idct_permutation);
4017
4018     qmul= qscale*2;
4019     qadd= (qscale-1)|1;
4020     if (s->mb_intra) {
4021         if (!s->h263_aic) {
4022             if (n < 4)
4023                 q = s->y_dc_scale;
4024             else
4025                 q = s->c_dc_scale;
4026         } else{
4027             /* For AIC we skip quant/dequant of INTRADC */
4028             q = 1;
4029             qadd=0;
4030         }
4031         q <<= RECON_SHIFT-3;
4032         /* note: block[0] is assumed to be positive */
4033         dc= block[0]*q;
4034 //        block[0] = (block[0] + (q >> 1)) / q;
4035         start_i = 1;
4036 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4037 //            bias= 1<<(QMAT_SHIFT-1);
4038         length     = s->intra_ac_vlc_length;
4039         last_length= s->intra_ac_vlc_last_length;
4040     } else {
4041         dc= 0;
4042         start_i = 0;
4043         length     = s->inter_ac_vlc_length;
4044         last_length= s->inter_ac_vlc_last_length;
4045     }
4046     last_non_zero = s->block_last_index[n];
4047
4048 #ifdef REFINE_STATS
4049 {START_TIMER
4050 #endif
4051     dc += (1<<(RECON_SHIFT-1));
4052     for(i=0; i<64; i++){
4053         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4054     }
4055 #ifdef REFINE_STATS
4056 STOP_TIMER("memset rem[]")}
4057 #endif
4058     sum=0;
4059     for(i=0; i<64; i++){
4060         int one= 36;
4061         int qns=4;
4062         int w;
4063
4064         w= FFABS(weight[i]) + qns*one;
4065         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4066
4067         weight[i] = w;
4068 //        w=weight[i] = (63*qns + (w/2)) / w;
4069
4070         av_assert2(w>0);
4071         av_assert2(w<(1<<6));
4072         sum += w*w;
4073     }
4074     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4075 #ifdef REFINE_STATS
4076 {START_TIMER
4077 #endif
4078     run=0;
4079     rle_index=0;
4080     for(i=start_i; i<=last_non_zero; i++){
4081         int j= perm_scantable[i];
4082         const int level= block[j];
4083         int coeff;
4084
4085         if(level){
4086             if(level<0) coeff= qmul*level - qadd;
4087             else        coeff= qmul*level + qadd;
4088             run_tab[rle_index++]=run;
4089             run=0;
4090
4091             s->dsp.add_8x8basis(rem, basis[j], coeff);
4092         }else{
4093             run++;
4094         }
4095     }
4096 #ifdef REFINE_STATS
4097 if(last_non_zero>0){
4098 STOP_TIMER("init rem[]")
4099 }
4100 }
4101
4102 {START_TIMER
4103 #endif
4104     for(;;){
4105         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
4106         int best_coeff=0;
4107         int best_change=0;
4108         int run2, best_unquant_change=0, analyze_gradient;
4109 #ifdef REFINE_STATS
4110 {START_TIMER
4111 #endif
4112         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4113
4114         if(analyze_gradient){
4115 #ifdef REFINE_STATS
4116 {START_TIMER
4117 #endif
4118             for(i=0; i<64; i++){
4119                 int w= weight[i];
4120
4121                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4122             }
4123 #ifdef REFINE_STATS
4124 STOP_TIMER("rem*w*w")}
4125 {START_TIMER
4126 #endif
4127             s->dsp.fdct(d1);
4128 #ifdef REFINE_STATS
4129 STOP_TIMER("dct")}
4130 #endif
4131         }
4132
4133         if(start_i){
4134             const int level= block[0];
4135             int change, old_coeff;
4136
4137             av_assert2(s->mb_intra);
4138
4139             old_coeff= q*level;
4140
4141             for(change=-1; change<=1; change+=2){
4142                 int new_level= level + change;
4143                 int score, new_coeff;
4144
4145                 new_coeff= q*new_level;
4146                 if(new_coeff >= 2048 || new_coeff < 0)
4147                     continue;
4148
4149                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
4150                 if(score<best_score){
4151                     best_score= score;
4152                     best_coeff= 0;
4153                     best_change= change;
4154                     best_unquant_change= new_coeff - old_coeff;
4155                 }
4156             }
4157         }
4158
4159         run=0;
4160         rle_index=0;
4161         run2= run_tab[rle_index++];
4162         prev_level=0;
4163         prev_run=0;
4164
4165         for(i=start_i; i<64; i++){
4166             int j= perm_scantable[i];
4167             const int level= block[j];
4168             int change, old_coeff;
4169
4170             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4171                 break;
4172
4173             if(level){
4174                 if(level<0) old_coeff= qmul*level - qadd;
4175                 else        old_coeff= qmul*level + qadd;
4176                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4177             }else{
4178                 old_coeff=0;
4179                 run2--;
4180                 av_assert2(run2>=0 || i >= last_non_zero );
4181             }
4182
4183             for(change=-1; change<=1; change+=2){
4184                 int new_level= level + change;
4185                 int score, new_coeff, unquant_change;
4186
4187                 score=0;
4188                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4189                    continue;
4190
4191                 if(new_level){
4192                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4193                     else            new_coeff= qmul*new_level + qadd;
4194                     if(new_coeff >= 2048 || new_coeff <= -2048)
4195                         continue;
4196                     //FIXME check for overflow
4197
4198                     if(level){
4199                         if(level < 63 && level > -63){
4200                             if(i < last_non_zero)
4201                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4202                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4203                             else
4204                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4205                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4206                         }
4207                     }else{
4208                         av_assert2(FFABS(new_level)==1);
4209
4210                         if(analyze_gradient){
4211                             int g= d1[ scantable[i] ];
4212                             if(g && (g^new_level) >= 0)
4213                                 continue;
4214                         }
4215
4216                         if(i < last_non_zero){
4217                             int next_i= i + run2 + 1;
4218                             int next_level= block[ perm_scantable[next_i] ] + 64;
4219
4220                             if(next_level&(~127))
4221                                 next_level= 0;
4222
4223                             if(next_i < last_non_zero)
4224                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4225                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4226                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4227                             else
4228                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4229                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4230                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4231                         }else{
4232                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4233                             if(prev_level){
4234                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4235                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4236                             }
4237                         }
4238                     }
4239                 }else{
4240                     new_coeff=0;
4241                     av_assert2(FFABS(level)==1);
4242
4243                     if(i < last_non_zero){
4244                         int next_i= i + run2 + 1;
4245                         int next_level= block[ perm_scantable[next_i] ] + 64;
4246
4247                         if(next_level&(~127))
4248                             next_level= 0;
4249
4250                         if(next_i < last_non_zero)
4251                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4252                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4253                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4254                         else
4255                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4256                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4257                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4258                     }else{
4259                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4260                         if(prev_level){
4261                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4262                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4263                         }
4264                     }
4265                 }
4266
4267                 score *= lambda;
4268
4269                 unquant_change= new_coeff - old_coeff;
4270                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4271
4272                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4273                 if(score<best_score){
4274                     best_score= score;
4275                     best_coeff= i;
4276                     best_change= change;
4277                     best_unquant_change= unquant_change;
4278                 }
4279             }
4280             if(level){
4281                 prev_level= level + 64;
4282                 if(prev_level&(~127))
4283                     prev_level= 0;
4284                 prev_run= run;
4285                 run=0;
4286             }else{
4287                 run++;
4288             }
4289         }
4290 #ifdef REFINE_STATS
4291 STOP_TIMER("iterative step")}
4292 #endif
4293
4294         if(best_change){
4295             int j= perm_scantable[ best_coeff ];
4296
4297             block[j] += best_change;
4298
4299             if(best_coeff > last_non_zero){
4300                 last_non_zero= best_coeff;
4301                 av_assert2(block[j]);
4302 #ifdef REFINE_STATS
4303 after_last++;
4304 #endif
4305             }else{
4306 #ifdef REFINE_STATS
4307 if(block[j]){
4308     if(block[j] - best_change){
4309         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4310             raise++;
4311         }else{
4312             lower++;
4313         }
4314     }else{
4315         from_zero++;
4316     }
4317 }else{
4318     to_zero++;
4319 }
4320 #endif
4321                 for(; last_non_zero>=start_i; last_non_zero--){
4322                     if(block[perm_scantable[last_non_zero]])
4323                         break;
4324                 }
4325             }
4326 #ifdef REFINE_STATS
4327 count++;
4328 if(256*256*256*64 % count == 0){
4329     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4330 }
4331 #endif
4332             run=0;
4333             rle_index=0;
4334             for(i=start_i; i<=last_non_zero; i++){
4335                 int j= perm_scantable[i];
4336                 const int level= block[j];
4337
4338                  if(level){
4339                      run_tab[rle_index++]=run;
4340                      run=0;
4341                  }else{
4342                      run++;
4343                  }
4344             }
4345
4346             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4347         }else{
4348             break;
4349         }
4350     }
4351 #ifdef REFINE_STATS
4352 if(last_non_zero>0){
4353 STOP_TIMER("iterative search")
4354 }
4355 }
4356 #endif
4357
4358     return last_non_zero;
4359 }
4360
4361 int ff_dct_quantize_c(MpegEncContext *s,
4362                         int16_t *block, int n,
4363                         int qscale, int *overflow)
4364 {
4365     int i, j, level, last_non_zero, q, start_i;
4366     const int *qmat;
4367     const uint8_t *scantable= s->intra_scantable.scantable;
4368     int bias;
4369     int max=0;
4370     unsigned int threshold1, threshold2;
4371
4372     s->dsp.fdct (block);
4373
4374     if(s->dct_error_sum)
4375         s->denoise_dct(s, block);
4376
4377     if (s->mb_intra) {
4378         if (!s->h263_aic) {
4379             if (n < 4)
4380                 q = s->y_dc_scale;
4381             else
4382                 q = s->c_dc_scale;
4383             q = q << 3;
4384         } else
4385             /* For AIC we skip quant/dequant of INTRADC */
4386             q = 1 << 3;
4387
4388         /* note: block[0] is assumed to be positive */
4389         block[0] = (block[0] + (q >> 1)) / q;
4390         start_i = 1;
4391         last_non_zero = 0;
4392         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4393         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4394     } else {
4395         start_i = 0;
4396         last_non_zero = -1;
4397         qmat = s->q_inter_matrix[qscale];
4398         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4399     }
4400     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4401     threshold2= (threshold1<<1);
4402     for(i=63;i>=start_i;i--) {
4403         j = scantable[i];
4404         level = block[j] * qmat[j];
4405
4406         if(((unsigned)(level+threshold1))>threshold2){
4407             last_non_zero = i;
4408             break;
4409         }else{
4410             block[j]=0;
4411         }
4412     }
4413     for(i=start_i; i<=last_non_zero; i++) {
4414         j = scantable[i];
4415         level = block[j] * qmat[j];
4416
4417 //        if(   bias+level >= (1<<QMAT_SHIFT)
4418 //           || bias-level >= (1<<QMAT_SHIFT)){
4419         if(((unsigned)(level+threshold1))>threshold2){
4420             if(level>0){
4421                 level= (bias + level)>>QMAT_SHIFT;
4422                 block[j]= level;
4423             }else{
4424                 level= (bias - level)>>QMAT_SHIFT;
4425                 block[j]= -level;
4426             }
4427             max |=level;
4428         }else{
4429             block[j]=0;
4430         }
4431     }
4432     *overflow= s->max_qcoeff < max; //overflow might have happened
4433
4434     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4435     if (s->idsp.idct_permutation_type != FF_NO_IDCT_PERM)
4436         ff_block_permute(block, s->idsp.idct_permutation,
4437                          scantable, last_non_zero);
4438
4439     return last_non_zero;
4440 }
4441
4442 #define OFFSET(x) offsetof(MpegEncContext, x)
4443 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4444 static const AVOption h263_options[] = {
4445     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4446     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4447     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4448     FF_MPV_COMMON_OPTS
4449     { NULL },
4450 };
4451
4452 static const AVClass h263_class = {
4453     .class_name = "H.263 encoder",
4454     .item_name  = av_default_item_name,
4455     .option     = h263_options,
4456     .version    = LIBAVUTIL_VERSION_INT,
4457 };
4458
4459 AVCodec ff_h263_encoder = {
4460     .name           = "h263",
4461     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4462     .type           = AVMEDIA_TYPE_VIDEO,
4463     .id             = AV_CODEC_ID_H263,
4464     .priv_data_size = sizeof(MpegEncContext),
4465     .init           = ff_MPV_encode_init,
4466     .encode2        = ff_MPV_encode_picture,
4467     .close          = ff_MPV_encode_end,
4468     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4469     .priv_class     = &h263_class,
4470 };
4471
4472 static const AVOption h263p_options[] = {
4473     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4474     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4475     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4476     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4477     FF_MPV_COMMON_OPTS
4478     { NULL },
4479 };
4480 static const AVClass h263p_class = {
4481     .class_name = "H.263p encoder",
4482     .item_name  = av_default_item_name,
4483     .option     = h263p_options,
4484     .version    = LIBAVUTIL_VERSION_INT,
4485 };
4486
4487 AVCodec ff_h263p_encoder = {
4488     .name           = "h263p",
4489     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4490     .type           = AVMEDIA_TYPE_VIDEO,
4491     .id             = AV_CODEC_ID_H263P,
4492     .priv_data_size = sizeof(MpegEncContext),
4493     .init           = ff_MPV_encode_init,
4494     .encode2        = ff_MPV_encode_picture,
4495     .close          = ff_MPV_encode_end,
4496     .capabilities   = CODEC_CAP_SLICE_THREADS,
4497     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4498     .priv_class     = &h263p_class,
4499 };
4500
4501 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4502
4503 AVCodec ff_msmpeg4v2_encoder = {
4504     .name           = "msmpeg4v2",
4505     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4506     .type           = AVMEDIA_TYPE_VIDEO,
4507     .id             = AV_CODEC_ID_MSMPEG4V2,
4508     .priv_data_size = sizeof(MpegEncContext),
4509     .init           = ff_MPV_encode_init,
4510     .encode2        = ff_MPV_encode_picture,
4511     .close          = ff_MPV_encode_end,
4512     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4513     .priv_class     = &msmpeg4v2_class,
4514 };
4515
4516 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4517
4518 AVCodec ff_msmpeg4v3_encoder = {
4519     .name           = "msmpeg4",
4520     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4521     .type           = AVMEDIA_TYPE_VIDEO,
4522     .id             = AV_CODEC_ID_MSMPEG4V3,
4523     .priv_data_size = sizeof(MpegEncContext),
4524     .init           = ff_MPV_encode_init,
4525     .encode2        = ff_MPV_encode_picture,
4526     .close          = ff_MPV_encode_end,
4527     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4528     .priv_class     = &msmpeg4v3_class,
4529 };
4530
4531 FF_MPV_GENERIC_CLASS(wmv1)
4532
4533 AVCodec ff_wmv1_encoder = {
4534     .name           = "wmv1",
4535     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4536     .type           = AVMEDIA_TYPE_VIDEO,
4537     .id             = AV_CODEC_ID_WMV1,
4538     .priv_data_size = sizeof(MpegEncContext),
4539     .init           = ff_MPV_encode_init,
4540     .encode2        = ff_MPV_encode_picture,
4541     .close          = ff_MPV_encode_end,
4542     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4543     .priv_class     = &wmv1_class,
4544 };