git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include "libavutil/intmath.h"
  31 #include "libavutil/mathematics.h"
  32 #include "libavutil/opt.h"
  33 #include "avcodec.h"
  34 #include "dsputil.h"
  35 #include "mpegvideo.h"
  36 #include "mpegvideo_common.h"
  37 #include "h263.h"
  38 #include "mjpegenc.h"
  39 #include "msmpeg4.h"
  40 #include "faandct.h"
  41 #include "thread.h"
  42 #include "aandcttab.h"
  43 #include "flv.h"
  44 #include "mpeg4video.h"
  45 #include "internal.h"
  46 #include <limits.h>
  47
  48 //#undef NDEBUG
  49 //#include <assert.h>
  50
  51 static int encode_picture(MpegEncContext *s, int picture_number);
  52 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  53 static int sse_mb(MpegEncContext *s);
  54 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  55 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  56
  57 /* enable all paranoid tests for rounding, overflows, etc... */
  58 //#define PARANOID
  59
  60 //#define DEBUG
  61
  62 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  63 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  64
  65 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
  66                        uint16_t (*qmat16)[2][64],
  67                        const uint16_t *quant_matrix,
  68                        int bias, int qmin, int qmax, int intra)
  69 {
  70     int qscale;
  71     int shift = 0;
  72
  73     for (qscale = qmin; qscale <= qmax; qscale++) {
  74         int i;
  75         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
  76             dsp->fdct == ff_jpeg_fdct_islow_10
  77 #ifdef FAAN_POSTSCALE
  78             || dsp->fdct == ff_faandct
  79 #endif
  80             ) {
  81             for (i = 0; i < 64; i++) {
  82                 const int j = dsp->idct_permutation[i];
  83                 /* 16 <= qscale * quant_matrix[i] <= 7905
  84                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  85                  *             19952 <=              x  <= 249205026
  86                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
  87                  *           3444240 >= (1 << 36) / (x) >= 275 */
  88
  89                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
  90                                         (qscale * quant_matrix[j]));
  91             }
  92         } else if (dsp->fdct == ff_fdct_ifast
  93 #ifndef FAAN_POSTSCALE
  94                    || dsp->fdct == ff_faandct
  95 #endif
  96                    ) {
  97             for (i = 0; i < 64; i++) {
  98                 const int j = dsp->idct_permutation[i];
  99                 /* 16 <= qscale * quant_matrix[i] <= 7905
 100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 101                  *             19952 <=              x  <= 249205026
 102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 103                  *           3444240 >= (1 << 36) / (x) >= 275 */
 104
 105                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 106                                         (ff_aanscales[i] * qscale *
 107                                          quant_matrix[j]));
 108             }
 109         } else {
 110             for (i = 0; i < 64; i++) {
 111                 const int j = dsp->idct_permutation[i];
 112                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 113                  * Assume x = qscale * quant_matrix[i]
 114                  * So             16 <=              x  <= 7905
 115                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 116                  * so          32768 >= (1 << 19) / (x) >= 67 */
 117                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 118                                         (qscale * quant_matrix[j]));
 119                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 120                 //                    (qscale * quant_matrix[i]);
 121                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 122                                        (qscale * quant_matrix[j]);
 123
 124                 if (qmat16[qscale][0][i] == 0 ||
 125                     qmat16[qscale][0][i] == 128 * 256)
 126                     qmat16[qscale][0][i] = 128 * 256 - 1;
 127                 qmat16[qscale][1][i] =
 128                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 129                                 qmat16[qscale][0][i]);
 130             }
 131         }
 132
 133         for (i = intra; i < 64; i++) {
 134             int64_t max = 8191;
 135             if (dsp->fdct == ff_fdct_ifast
 136 #ifndef FAAN_POSTSCALE
 137                 || dsp->fdct == ff_faandct
 138 #endif
 139                ) {
 140                 max = (8191LL * ff_aanscales[i]) >> 14;
 141             }
 142             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 143                 shift++;
 144             }
 145         }
 146     }
 147     if (shift) {
 148         av_log(NULL, AV_LOG_INFO,
 149                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 150                QMAT_SHIFT - shift);
 151     }
 152 }
 153
 154 static inline void update_qscale(MpegEncContext *s)
 155 {
 156     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 157                 (FF_LAMBDA_SHIFT + 7);
 158     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 159
 160     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 161                  FF_LAMBDA_SHIFT;
 162 }
 163
 164 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 165 {
 166     int i;
 167
 168     if (matrix) {
 169         put_bits(pb, 1, 1);
 170         for (i = 0; i < 64; i++) {
 171             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 172         }
 173     } else
 174         put_bits(pb, 1, 0);
 175 }
 176
 177 /**
 178  * init s->current_picture.qscale_table from s->lambda_table
 179  */
 180 void ff_init_qscale_tab(MpegEncContext *s)
 181 {
 182     int8_t * const qscale_table = s->current_picture.f.qscale_table;
 183     int i;
 184
 185     for (i = 0; i < s->mb_num; i++) {
 186         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 187         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 188         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 189                                                   s->avctx->qmax);
 190     }
 191 }
 192
 193 static void copy_picture_attributes(MpegEncContext *s,
 194                                     AVFrame *dst,
 195                                     AVFrame *src)
 196 {
 197     int i;
 198
 199     dst->pict_type              = src->pict_type;
 200     dst->quality                = src->quality;
 201     dst->coded_picture_number   = src->coded_picture_number;
 202     dst->display_picture_number = src->display_picture_number;
 203     //dst->reference              = src->reference;
 204     dst->pts                    = src->pts;
 205     dst->interlaced_frame       = src->interlaced_frame;
 206     dst->top_field_first        = src->top_field_first;
 207
 208     if (s->avctx->me_threshold) {
 209         if (!src->motion_val[0])
 210             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
 211         if (!src->mb_type)
 212             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
 213         if (!src->ref_index[0])
 214             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
 215         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
 216             av_log(s->avctx, AV_LOG_ERROR,
 217                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
 218                    src->motion_subsample_log2, dst->motion_subsample_log2);
 219
 220         memcpy(dst->mb_type, src->mb_type,
 221                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
 222
 223         for (i = 0; i < 2; i++) {
 224             int stride = ((16 * s->mb_width ) >>
 225                           src->motion_subsample_log2) + 1;
 226             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
 227
 228             if (src->motion_val[i] &&
 229                 src->motion_val[i] != dst->motion_val[i]) {
 230                 memcpy(dst->motion_val[i], src->motion_val[i],
 231                        2 * stride * height * sizeof(int16_t));
 232             }
 233             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
 234                 memcpy(dst->ref_index[i], src->ref_index[i],
 235                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
 236             }
 237         }
 238     }
 239 }
 240
 241 static void update_duplicate_context_after_me(MpegEncContext *dst,
 242                                               MpegEncContext *src)
 243 {
 244 #define COPY(a) dst->a= src->a
 245     COPY(pict_type);
 246     COPY(current_picture);
 247     COPY(f_code);
 248     COPY(b_code);
 249     COPY(qscale);
 250     COPY(lambda);
 251     COPY(lambda2);
 252     COPY(picture_in_gop_number);
 253     COPY(gop_picture_number);
 254     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 255     COPY(progressive_frame);    // FIXME don't set in encode_header
 256     COPY(partitioned_frame);    // FIXME don't set in encode_header
 257 #undef COPY
 258 }
 259
 260 /**
 261  * Set the given MpegEncContext to defaults for encoding.
 262  * the changed fields will not depend upon the prior state of the MpegEncContext.
 263  */
 264 static void MPV_encode_defaults(MpegEncContext *s)
 265 {
 266     int i;
 267     ff_MPV_common_defaults(s);
 268
 269     for (i = -16; i < 16; i++) {
 270         default_fcode_tab[i + MAX_MV] = 1;
 271     }
 272     s->me.mv_penalty = default_mv_penalty;
 273     s->fcode_tab     = default_fcode_tab;
 274 }
 275
 276 /* init video encoder */
 277 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
 278 {
 279     MpegEncContext *s = avctx->priv_data;
 280     int i;
 281     int chroma_h_shift, chroma_v_shift;
 282
 283     MPV_encode_defaults(s);
 284
 285     switch (avctx->codec_id) {
 286     case CODEC_ID_MPEG2VIDEO:
 287         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
 288             avctx->pix_fmt != PIX_FMT_YUV422P) {
 289             av_log(avctx, AV_LOG_ERROR,
 290                    "only YUV420 and YUV422 are supported\n");
 291             return -1;
 292         }
 293         break;
 294     case CODEC_ID_LJPEG:
 295         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
 296             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
 297             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
 298             avctx->pix_fmt != PIX_FMT_BGRA     &&
 299             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
 300               avctx->pix_fmt != PIX_FMT_YUV422P &&
 301               avctx->pix_fmt != PIX_FMT_YUV444P) ||
 302              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
 303             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
 304             return -1;
 305         }
 306         break;
 307     case CODEC_ID_MJPEG:
 308         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
 309             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
 310             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
 311               avctx->pix_fmt != PIX_FMT_YUV422P) ||
 312              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
 313             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 314             return -1;
 315         }
 316         break;
 317     default:
 318         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
 319             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 320             return -1;
 321         }
 322     }
 323
 324     switch (avctx->pix_fmt) {
 325     case PIX_FMT_YUVJ422P:
 326     case PIX_FMT_YUV422P:
 327         s->chroma_format = CHROMA_422;
 328         break;
 329     case PIX_FMT_YUVJ420P:
 330     case PIX_FMT_YUV420P:
 331     default:
 332         s->chroma_format = CHROMA_420;
 333         break;
 334     }
 335
 336     s->bit_rate = avctx->bit_rate;
 337     s->width    = avctx->width;
 338     s->height   = avctx->height;
 339     if (avctx->gop_size > 600 &&
 340         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 341         av_log(avctx, AV_LOG_ERROR,
 342                "Warning keyframe interval too large! reducing it ...\n");
 343         avctx->gop_size = 600;
 344     }
 345     s->gop_size     = avctx->gop_size;
 346     s->avctx        = avctx;
 347     s->flags        = avctx->flags;
 348     s->flags2       = avctx->flags2;
 349     s->max_b_frames = avctx->max_b_frames;
 350     s->codec_id     = avctx->codec->id;
 351     s->luma_elim_threshold   = avctx->luma_elim_threshold;
 352     s->chroma_elim_threshold = avctx->chroma_elim_threshold;
 353     s->strict_std_compliance = avctx->strict_std_compliance;
 354     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 355     s->mpeg_quant         = avctx->mpeg_quant;
 356     s->rtp_mode           = !!avctx->rtp_payload_size;
 357     s->intra_dc_precision = avctx->intra_dc_precision;
 358     s->user_specified_pts = AV_NOPTS_VALUE;
 359
 360     if (s->gop_size <= 1) {
 361         s->intra_only = 1;
 362         s->gop_size   = 12;
 363     } else {
 364         s->intra_only = 0;
 365     }
 366
 367     s->me_method = avctx->me_method;
 368
 369     /* Fixed QSCALE */
 370     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 371
 372     s->adaptive_quant = (s->avctx->lumi_masking ||
 373                          s->avctx->dark_masking ||
 374                          s->avctx->temporal_cplx_masking ||
 375                          s->avctx->spatial_cplx_masking  ||
 376                          s->avctx->p_masking      ||
 377                          s->avctx->border_masking ||
 378                          (s->flags & CODEC_FLAG_QP_RD)) &&
 379                         !s->fixed_qscale;
 380
 381     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 382
 383     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 384         av_log(avctx, AV_LOG_ERROR,
 385                "a vbv buffer size is needed, "
 386                "for encoding with a maximum bitrate\n");
 387         return -1;
 388     }
 389
 390     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 391         av_log(avctx, AV_LOG_INFO,
 392                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 393     }
 394
 395     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 396         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 397         return -1;
 398     }
 399
 400     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 401         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 402         return -1;
 403     }
 404
 405     if (avctx->rc_max_rate &&
 406         avctx->rc_max_rate == avctx->bit_rate &&
 407         avctx->rc_max_rate != avctx->rc_min_rate) {
 408         av_log(avctx, AV_LOG_INFO,
 409                "impossible bitrate constraints, this will fail\n");
 410     }
 411
 412     if (avctx->rc_buffer_size &&
 413         avctx->bit_rate * (int64_t)avctx->time_base.num >
 414             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 415         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 416         return -1;
 417     }
 418
 419     if (!s->fixed_qscale &&
 420         avctx->bit_rate * av_q2d(avctx->time_base) >
 421             avctx->bit_rate_tolerance) {
 422         av_log(avctx, AV_LOG_ERROR,
 423                "bitrate tolerance too small for bitrate\n");
 424         return -1;
 425     }
 426
 427     if (s->avctx->rc_max_rate &&
 428         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 429         (s->codec_id == CODEC_ID_MPEG1VIDEO ||
 430          s->codec_id == CODEC_ID_MPEG2VIDEO) &&
 431         90000LL * (avctx->rc_buffer_size - 1) >
 432             s->avctx->rc_max_rate * 0xFFFFLL) {
 433         av_log(avctx, AV_LOG_INFO,
 434                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 435                "specified vbv buffer is too large for the given bitrate!\n");
 436     }
 437
 438     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != CODEC_ID_MPEG4 &&
 439         s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P &&
 440         s->codec_id != CODEC_ID_FLV1) {
 441         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 442         return -1;
 443     }
 444
 445     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 446         av_log(avctx, AV_LOG_ERROR,
 447                "OBMC is only supported with simple mb decision\n");
 448         return -1;
 449     }
 450
 451     if (s->quarter_sample && s->codec_id != CODEC_ID_MPEG4) {
 452         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 453         return -1;
 454     }
 455
 456     if (s->max_b_frames                    &&
 457         s->codec_id != CODEC_ID_MPEG4      &&
 458         s->codec_id != CODEC_ID_MPEG1VIDEO &&
 459         s->codec_id != CODEC_ID_MPEG2VIDEO) {
 460         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 461         return -1;
 462     }
 463
 464     if ((s->codec_id == CODEC_ID_MPEG4 ||
 465          s->codec_id == CODEC_ID_H263  ||
 466          s->codec_id == CODEC_ID_H263P) &&
 467         (avctx->sample_aspect_ratio.num > 255 ||
 468          avctx->sample_aspect_ratio.den > 255)) {
 469         av_log(avctx, AV_LOG_ERROR,
 470                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
 471                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 472         return -1;
 473     }
 474
 475     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 476         s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO) {
 477         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 478         return -1;
 479     }
 480
 481     // FIXME mpeg2 uses that too
 482     if (s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4) {
 483         av_log(avctx, AV_LOG_ERROR,
 484                "mpeg2 style quantization not supported by codec\n");
 485         return -1;
 486     }
 487
 488     if ((s->flags & CODEC_FLAG_CBP_RD) && !avctx->trellis) {
 489         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 490         return -1;
 491     }
 492
 493     if ((s->flags & CODEC_FLAG_QP_RD) &&
 494         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 495         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 496         return -1;
 497     }
 498
 499     if (s->avctx->scenechange_threshold < 1000000000 &&
 500         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 501         av_log(avctx, AV_LOG_ERROR,
 502                "closed gop with scene change detection are not supported yet, "
 503                "set threshold to 1000000000\n");
 504         return -1;
 505     }
 506
 507     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 508         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
 509             av_log(avctx, AV_LOG_ERROR,
 510                   "low delay forcing is only available for mpeg2\n");
 511             return -1;
 512         }
 513         if (s->max_b_frames != 0) {
 514             av_log(avctx, AV_LOG_ERROR,
 515                    "b frames cannot be used with low delay\n");
 516             return -1;
 517         }
 518     }
 519
 520     if (s->q_scale_type == 1) {
 521         if (avctx->qmax > 12) {
 522             av_log(avctx, AV_LOG_ERROR,
 523                    "non linear quant only supports qmax <= 12 currently\n");
 524             return -1;
 525         }
 526     }
 527
 528     if (s->avctx->thread_count > 1         &&
 529         s->codec_id != CODEC_ID_MPEG4      &&
 530         s->codec_id != CODEC_ID_MPEG1VIDEO &&
 531         s->codec_id != CODEC_ID_MPEG2VIDEO &&
 532         (s->codec_id != CODEC_ID_H263P)) {
 533         av_log(avctx, AV_LOG_ERROR,
 534                "multi threaded encoding not supported by codec\n");
 535         return -1;
 536     }
 537
 538     if (s->avctx->thread_count < 1) {
 539         av_log(avctx, AV_LOG_ERROR,
 540                "automatic thread number detection not supported by codec,"
 541                "patch welcome\n");
 542         return -1;
 543     }
 544
 545     if (s->avctx->thread_count > 1)
 546         s->rtp_mode = 1;
 547
 548     if (!avctx->time_base.den || !avctx->time_base.num) {
 549         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 550         return -1;
 551     }
 552
 553     i = (INT_MAX / 2 + 128) >> 8;
 554     if (avctx->me_threshold >= i) {
 555         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
 556                i - 1);
 557         return -1;
 558     }
 559     if (avctx->mb_threshold >= i) {
 560         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 561                i - 1);
 562         return -1;
 563     }
 564
 565     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 566         av_log(avctx, AV_LOG_INFO,
 567                "notice: b_frame_strategy only affects the first pass\n");
 568         avctx->b_frame_strategy = 0;
 569     }
 570
 571     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 572     if (i > 1) {
 573         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 574         avctx->time_base.den /= i;
 575         avctx->time_base.num /= i;
 576         //return -1;
 577     }
 578
 579     if (s->mpeg_quant || s->codec_id == CODEC_ID_MPEG1VIDEO ||
 580         s->codec_id == CODEC_ID_MPEG2VIDEO || s->codec_id == CODEC_ID_MJPEG) {
 581         // (a + x * 3 / 8) / x
 582         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 583         s->inter_quant_bias = 0;
 584     } else {
 585         s->intra_quant_bias = 0;
 586         // (a - x / 4) / x
 587         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 588     }
 589
 590     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 591         s->intra_quant_bias = avctx->intra_quant_bias;
 592     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 593         s->inter_quant_bias = avctx->inter_quant_bias;
 594
 595     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
 596                                   &chroma_v_shift);
 597
 598     if (avctx->codec_id == CODEC_ID_MPEG4 &&
 599         s->avctx->time_base.den > (1 << 16) - 1) {
 600         av_log(avctx, AV_LOG_ERROR,
 601                "timebase %d/%d not supported by MPEG 4 standard, "
 602                "the maximum admitted value for the timebase denominator "
 603                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 604                (1 << 16) - 1);
 605         return -1;
 606     }
 607     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 608
 609     switch (avctx->codec->id) {
 610     case CODEC_ID_MPEG1VIDEO:
 611         s->out_format = FMT_MPEG1;
 612         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 613         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 614         break;
 615     case CODEC_ID_MPEG2VIDEO:
 616         s->out_format = FMT_MPEG1;
 617         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 618         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 619         s->rtp_mode   = 1;
 620         break;
 621     case CODEC_ID_LJPEG:
 622     case CODEC_ID_MJPEG:
 623         s->out_format = FMT_MJPEG;
 624         s->intra_only = 1; /* force intra only for jpeg */
 625         if (avctx->codec->id == CODEC_ID_LJPEG &&
 626             avctx->pix_fmt   == PIX_FMT_BGRA) {
 627             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
 628             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
 629             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
 630         } else {
 631             s->mjpeg_vsample[0] = 2;
 632             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
 633             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
 634             s->mjpeg_hsample[0] = 2;
 635             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
 636             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
 637         }
 638         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
 639             ff_mjpeg_encode_init(s) < 0)
 640             return -1;
 641         avctx->delay = 0;
 642         s->low_delay = 1;
 643         break;
 644     case CODEC_ID_H261:
 645         if (!CONFIG_H261_ENCODER)
 646             return -1;
 647         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 648             av_log(avctx, AV_LOG_ERROR,
 649                    "The specified picture size of %dx%d is not valid for the "
 650                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 651                     s->width, s->height);
 652             return -1;
 653         }
 654         s->out_format = FMT_H261;
 655         avctx->delay  = 0;
 656         s->low_delay  = 1;
 657         break;
 658     case CODEC_ID_H263:
 659         if (!CONFIG_H263_ENCODER)
 660         return -1;
 661         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 662                              s->width, s->height) == 8) {
 663             av_log(avctx, AV_LOG_INFO,
 664                    "The specified picture size of %dx%d is not valid for "
 665                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 666                    "352x288, 704x576, and 1408x1152."
 667                    "Try H.263+.\n", s->width, s->height);
 668             return -1;
 669         }
 670         s->out_format = FMT_H263;
 671         avctx->delay  = 0;
 672         s->low_delay  = 1;
 673         break;
 674     case CODEC_ID_H263P:
 675         s->out_format = FMT_H263;
 676         s->h263_plus  = 1;
 677         /* Fx */
 678         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 679         s->modified_quant  = s->h263_aic;
 680         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 681         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 682
 683         /* /Fx */
 684         /* These are just to be sure */
 685         avctx->delay = 0;
 686         s->low_delay = 1;
 687         break;
 688     case CODEC_ID_FLV1:
 689         s->out_format      = FMT_H263;
 690         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 691         s->unrestricted_mv = 1;
 692         s->rtp_mode  = 0; /* don't allow GOB */
 693         avctx->delay = 0;
 694         s->low_delay = 1;
 695         break;
 696     case CODEC_ID_RV10:
 697         s->out_format = FMT_H263;
 698         avctx->delay  = 0;
 699         s->low_delay  = 1;
 700         break;
 701     case CODEC_ID_RV20:
 702         s->out_format      = FMT_H263;
 703         avctx->delay       = 0;
 704         s->low_delay       = 1;
 705         s->modified_quant  = 1;
 706         s->h263_aic        = 1;
 707         s->h263_plus       = 1;
 708         s->loop_filter     = 1;
 709         s->unrestricted_mv = 0;
 710         break;
 711     case CODEC_ID_MPEG4:
 712         s->out_format      = FMT_H263;
 713         s->h263_pred       = 1;
 714         s->unrestricted_mv = 1;
 715         s->low_delay       = s->max_b_frames ? 0 : 1;
 716         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 717         break;
 718     case CODEC_ID_MSMPEG4V2:
 719         s->out_format      = FMT_H263;
 720         s->h263_pred       = 1;
 721         s->unrestricted_mv = 1;
 722         s->msmpeg4_version = 2;
 723         avctx->delay       = 0;
 724         s->low_delay       = 1;
 725         break;
 726     case CODEC_ID_MSMPEG4V3:
 727         s->out_format        = FMT_H263;
 728         s->h263_pred         = 1;
 729         s->unrestricted_mv   = 1;
 730         s->msmpeg4_version   = 3;
 731         s->flipflop_rounding = 1;
 732         avctx->delay         = 0;
 733         s->low_delay         = 1;
 734         break;
 735     case CODEC_ID_WMV1:
 736         s->out_format        = FMT_H263;
 737         s->h263_pred         = 1;
 738         s->unrestricted_mv   = 1;
 739         s->msmpeg4_version   = 4;
 740         s->flipflop_rounding = 1;
 741         avctx->delay         = 0;
 742         s->low_delay         = 1;
 743         break;
 744     case CODEC_ID_WMV2:
 745         s->out_format        = FMT_H263;
 746         s->h263_pred         = 1;
 747         s->unrestricted_mv   = 1;
 748         s->msmpeg4_version   = 5;
 749         s->flipflop_rounding = 1;
 750         avctx->delay         = 0;
 751         s->low_delay         = 1;
 752         break;
 753     default:
 754         return -1;
 755     }
 756
 757     avctx->has_b_frames = !s->low_delay;
 758
 759     s->encoding = 1;
 760
 761     s->progressive_frame    =
 762     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 763                                                 CODEC_FLAG_INTERLACED_ME) ||
 764                                 s->alternate_scan);
 765
 766     /* init */
 767     if (ff_MPV_common_init(s) < 0)
 768         return -1;
 769
 770     if (!s->dct_quantize)
 771         s->dct_quantize = ff_dct_quantize_c;
 772     if (!s->denoise_dct)
 773         s->denoise_dct  = denoise_dct_c;
 774     s->fast_dct_quantize = s->dct_quantize;
 775     if (avctx->trellis)
 776         s->dct_quantize  = dct_quantize_trellis_c;
 777
 778     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 779         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 780
 781     s->quant_precision = 5;
 782
 783     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
 784     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
 785
 786     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 787         ff_h261_encode_init(s);
 788     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 789         ff_h263_encode_init(s);
 790     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 791         ff_msmpeg4_encode_init(s);
 792     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 793         && s->out_format == FMT_MPEG1)
 794         ff_mpeg1_encode_init(s);
 795
 796     /* init q matrix */
 797     for (i = 0; i < 64; i++) {
 798         int j = s->dsp.idct_permutation[i];
 799         if (CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4 &&
 800             s->mpeg_quant) {
 801             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 802             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 803         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 804             s->intra_matrix[j] =
 805             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 806         } else {
 807             /* mpeg1/2 */
 808             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 809             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 810         }
 811         if (s->avctx->intra_matrix)
 812             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 813         if (s->avctx->inter_matrix)
 814             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 815     }
 816
 817     /* precompute matrix */
 818     /* for mjpeg, we do include qscale in the matrix */
 819     if (s->out_format != FMT_MJPEG) {
 820         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
 821                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 822                           31, 1);
 823         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
 824                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 825                           31, 0);
 826     }
 827
 828     if (ff_rate_control_init(s) < 0)
 829         return -1;
 830
 831     return 0;
 832 }
 833
 834 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
 835 {
 836     MpegEncContext *s = avctx->priv_data;
 837
 838     ff_rate_control_uninit(s);
 839
 840     ff_MPV_common_end(s);
 841     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
 842         s->out_format == FMT_MJPEG)
 843         ff_mjpeg_encode_close(s);
 844
 845     av_freep(&avctx->extradata);
 846
 847     return 0;
 848 }
 849
 850 static int get_sae(uint8_t *src, int ref, int stride)
 851 {
 852     int x,y;
 853     int acc = 0;
 854
 855     for (y = 0; y < 16; y++) {
 856         for (x = 0; x < 16; x++) {
 857             acc += FFABS(src[x + y * stride] - ref);
 858         }
 859     }
 860
 861     return acc;
 862 }
 863
 864 static int get_intra_count(MpegEncContext *s, uint8_t *src,
 865                            uint8_t *ref, int stride)
 866 {
 867     int x, y, w, h;
 868     int acc = 0;
 869
 870     w = s->width  & ~15;
 871     h = s->height & ~15;
 872
 873     for (y = 0; y < h; y += 16) {
 874         for (x = 0; x < w; x += 16) {
 875             int offset = x + y * stride;
 876             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
 877                                      16);
 878             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
 879             int sae  = get_sae(src + offset, mean, stride);
 880
 881             acc += sae + 500 < sad;
 882         }
 883     }
 884     return acc;
 885 }
 886
 887
 888 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
 889 {
 890     AVFrame *pic = NULL;
 891     int64_t pts;
 892     int i;
 893     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
 894                                                  (s->low_delay ? 0 : 1);
 895     int direct = 1;
 896
 897     if (pic_arg) {
 898         pts = pic_arg->pts;
 899         pic_arg->display_picture_number = s->input_picture_number++;
 900
 901         if (pts != AV_NOPTS_VALUE) {
 902             if (s->user_specified_pts != AV_NOPTS_VALUE) {
 903                 int64_t time = pts;
 904                 int64_t last = s->user_specified_pts;
 905
 906                 if (time <= last) {
 907                     av_log(s->avctx, AV_LOG_ERROR,
 908                            "Error, Invalid timestamp=%"PRId64", "
 909                            "last=%"PRId64"\n", pts, s->user_specified_pts);
 910                     return -1;
 911                 }
 912
 913                 if (!s->low_delay && pic_arg->display_picture_number == 1)
 914                     s->dts_delta = time - last;
 915             }
 916             s->user_specified_pts = pts;
 917         } else {
 918             if (s->user_specified_pts != AV_NOPTS_VALUE) {
 919                 s->user_specified_pts =
 920                 pts = s->user_specified_pts + 1;
 921                 av_log(s->avctx, AV_LOG_INFO,
 922                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
 923                        pts);
 924             } else {
 925                 pts = pic_arg->display_picture_number;
 926             }
 927         }
 928     }
 929
 930   if (pic_arg) {
 931     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
 932         direct = 0;
 933     if (pic_arg->linesize[0] != s->linesize)
 934         direct = 0;
 935     if (pic_arg->linesize[1] != s->uvlinesize)
 936         direct = 0;
 937     if (pic_arg->linesize[2] != s->uvlinesize)
 938         direct = 0;
 939
 940     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
 941     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
 942
 943     if (direct) {
 944         i = ff_find_unused_picture(s, 1);
 945         if (i < 0)
 946             return i;
 947
 948         pic = (AVFrame *) &s->picture[i];
 949         pic->reference = 3;
 950
 951         for (i = 0; i < 4; i++) {
 952             pic->data[i]     = pic_arg->data[i];
 953             pic->linesize[i] = pic_arg->linesize[i];
 954         }
 955         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
 956             return -1;
 957         }
 958     } else {
 959         i = ff_find_unused_picture(s, 0);
 960         if (i < 0)
 961             return i;
 962
 963         pic = (AVFrame *) &s->picture[i];
 964         pic->reference = 3;
 965
 966         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
 967             return -1;
 968         }
 969
 970         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
 971             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
 972             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
 973             // empty
 974         } else {
 975             int h_chroma_shift, v_chroma_shift;
 976             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
 977                                           &v_chroma_shift);
 978
 979             for (i = 0; i < 3; i++) {
 980                 int src_stride = pic_arg->linesize[i];
 981                 int dst_stride = i ? s->uvlinesize : s->linesize;
 982                 int h_shift = i ? h_chroma_shift : 0;
 983                 int v_shift = i ? v_chroma_shift : 0;
 984                 int w = s->width  >> h_shift;
 985                 int h = s->height >> v_shift;
 986                 uint8_t *src = pic_arg->data[i];
 987                 uint8_t *dst = pic->data[i];
 988
 989                 if (!s->avctx->rc_buffer_size)
 990                     dst += INPLACE_OFFSET;
 991
 992                 if (src_stride == dst_stride)
 993                     memcpy(dst, src, src_stride * h);
 994                 else {
 995                     while (h--) {
 996                         memcpy(dst, src, w);
 997                         dst += dst_stride;
 998                         src += src_stride;
 999                     }
1000                 }
1001             }
1002         }
1003     }
1004     copy_picture_attributes(s, pic, pic_arg);
1005     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1006   }
1007
1008     /* shift buffer entries */
1009     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1010         s->input_picture[i - 1] = s->input_picture[i];
1011
1012     s->input_picture[encoding_delay] = (Picture*) pic;
1013
1014     return 0;
1015 }
1016
1017 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1018 {
1019     int x, y, plane;
1020     int score = 0;
1021     int64_t score64 = 0;
1022
1023     for (plane = 0; plane < 3; plane++) {
1024         const int stride = p->f.linesize[plane];
1025         const int bw = plane ? 1 : 2;
1026         for (y = 0; y < s->mb_height * bw; y++) {
1027             for (x = 0; x < s->mb_width * bw; x++) {
1028                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1029                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1030                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1031                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1032
1033                 switch (s->avctx->frame_skip_exp) {
1034                 case 0: score    =  FFMAX(score, v);          break;
1035                 case 1: score   += FFABS(v);                  break;
1036                 case 2: score   += v * v;                     break;
1037                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1038                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1039                 }
1040             }
1041         }
1042     }
1043
1044     if (score)
1045         score64 = score;
1046
1047     if (score64 < s->avctx->frame_skip_threshold)
1048         return 1;
1049     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1050         return 1;
1051     return 0;
1052 }
1053
1054 static int estimate_best_b_count(MpegEncContext *s)
1055 {
1056     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1057     AVCodecContext *c = avcodec_alloc_context3(NULL);
1058     AVFrame input[FF_MAX_B_FRAMES + 2];
1059     const int scale = s->avctx->brd_scale;
1060     int i, j, out_size, p_lambda, b_lambda, lambda2;
1061     int outbuf_size  = s->width * s->height; // FIXME
1062     uint8_t *outbuf  = av_malloc(outbuf_size);
1063     int64_t best_rd  = INT64_MAX;
1064     int best_b_count = -1;
1065
1066     assert(scale >= 0 && scale <= 3);
1067
1068     //emms_c();
1069     //s->next_picture_ptr->quality;
1070     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1071     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1072     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1073     if (!b_lambda) // FIXME we should do this somewhere else
1074         b_lambda = p_lambda;
1075     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1076                FF_LAMBDA_SHIFT;
1077
1078     c->width        = s->width  >> scale;
1079     c->height       = s->height >> scale;
1080     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1081                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1082     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1083     c->mb_decision  = s->avctx->mb_decision;
1084     c->me_cmp       = s->avctx->me_cmp;
1085     c->mb_cmp       = s->avctx->mb_cmp;
1086     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1087     c->pix_fmt      = PIX_FMT_YUV420P;
1088     c->time_base    = s->avctx->time_base;
1089     c->max_b_frames = s->max_b_frames;
1090
1091     if (avcodec_open2(c, codec, NULL) < 0)
1092         return -1;
1093
1094     for (i = 0; i < s->max_b_frames + 2; i++) {
1095         int ysize = c->width * c->height;
1096         int csize = (c->width / 2) * (c->height / 2);
1097         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1098                                                 s->next_picture_ptr;
1099
1100         avcodec_get_frame_defaults(&input[i]);
1101         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1102         input[i].data[1]     = input[i].data[0] + ysize;
1103         input[i].data[2]     = input[i].data[1] + csize;
1104         input[i].linesize[0] = c->width;
1105         input[i].linesize[1] =
1106         input[i].linesize[2] = c->width / 2;
1107
1108         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1109             pre_input = *pre_input_ptr;
1110
1111             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1112                 pre_input.f.data[0] += INPLACE_OFFSET;
1113                 pre_input.f.data[1] += INPLACE_OFFSET;
1114                 pre_input.f.data[2] += INPLACE_OFFSET;
1115             }
1116
1117             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1118                                  pre_input.f.data[0], pre_input.f.linesize[0],
1119                                  c->width,      c->height);
1120             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1121                                  pre_input.f.data[1], pre_input.f.linesize[1],
1122                                  c->width >> 1, c->height >> 1);
1123             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1124                                  pre_input.f.data[2], pre_input.f.linesize[2],
1125                                  c->width >> 1, c->height >> 1);
1126         }
1127     }
1128
1129     for (j = 0; j < s->max_b_frames + 1; j++) {
1130         int64_t rd = 0;
1131
1132         if (!s->input_picture[j])
1133             break;
1134
1135         c->error[0] = c->error[1] = c->error[2] = 0;
1136
1137         input[0].pict_type = AV_PICTURE_TYPE_I;
1138         input[0].quality   = 1 * FF_QP2LAMBDA;
1139         out_size           = avcodec_encode_video(c, outbuf,
1140                                                   outbuf_size, &input[0]);
1141         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1142
1143         for (i = 0; i < s->max_b_frames + 1; i++) {
1144             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1145
1146             input[i + 1].pict_type = is_p ?
1147                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1148             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1149             out_size = avcodec_encode_video(c, outbuf, outbuf_size,
1150                                             &input[i + 1]);
1151             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1152         }
1153
1154         /* get the delayed frames */
1155         while (out_size) {
1156             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
1157             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1158         }
1159
1160         rd += c->error[0] + c->error[1] + c->error[2];
1161
1162         if (rd < best_rd) {
1163             best_rd = rd;
1164             best_b_count = j;
1165         }
1166     }
1167
1168     av_freep(&outbuf);
1169     avcodec_close(c);
1170     av_freep(&c);
1171
1172     for (i = 0; i < s->max_b_frames + 2; i++) {
1173         av_freep(&input[i].data[0]);
1174     }
1175
1176     return best_b_count;
1177 }
1178
1179 static int select_input_picture(MpegEncContext *s)
1180 {
1181     int i;
1182
1183     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1184         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1185     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1186
1187     /* set next picture type & ordering */
1188     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1189         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1190             s->next_picture_ptr == NULL || s->intra_only) {
1191             s->reordered_input_picture[0] = s->input_picture[0];
1192             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1193             s->reordered_input_picture[0]->f.coded_picture_number =
1194                 s->coded_picture_number++;
1195         } else {
1196             int b_frames;
1197
1198             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1199                 if (s->picture_in_gop_number < s->gop_size &&
1200                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1201                     // FIXME check that te gop check above is +-1 correct
1202                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1203                     //       s->input_picture[0]->f.data[0],
1204                     //       s->input_picture[0]->pts);
1205
1206                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1207                         for (i = 0; i < 4; i++)
1208                             s->input_picture[0]->f.data[i] = NULL;
1209                         s->input_picture[0]->f.type = 0;
1210                     } else {
1211                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1212                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1213
1214                         s->avctx->release_buffer(s->avctx,
1215                                                  (AVFrame *) s->input_picture[0]);
1216                     }
1217
1218                     emms_c();
1219                     ff_vbv_update(s, 0);
1220
1221                     goto no_output_pic;
1222                 }
1223             }
1224
1225             if (s->flags & CODEC_FLAG_PASS2) {
1226                 for (i = 0; i < s->max_b_frames + 1; i++) {
1227                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1228
1229                     if (pict_num >= s->rc_context.num_entries)
1230                         break;
1231                     if (!s->input_picture[i]) {
1232                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1233                         break;
1234                     }
1235
1236                     s->input_picture[i]->f.pict_type =
1237                         s->rc_context.entry[pict_num].new_pict_type;
1238                 }
1239             }
1240
1241             if (s->avctx->b_frame_strategy == 0) {
1242                 b_frames = s->max_b_frames;
1243                 while (b_frames && !s->input_picture[b_frames])
1244                     b_frames--;
1245             } else if (s->avctx->b_frame_strategy == 1) {
1246                 for (i = 1; i < s->max_b_frames + 1; i++) {
1247                     if (s->input_picture[i] &&
1248                         s->input_picture[i]->b_frame_score == 0) {
1249                         s->input_picture[i]->b_frame_score =
1250                             get_intra_count(s,
1251                                             s->input_picture[i    ]->f.data[0],
1252                                             s->input_picture[i - 1]->f.data[0],
1253                                             s->linesize) + 1;
1254                     }
1255                 }
1256                 for (i = 0; i < s->max_b_frames + 1; i++) {
1257                     if (s->input_picture[i] == NULL ||
1258                         s->input_picture[i]->b_frame_score - 1 >
1259                             s->mb_num / s->avctx->b_sensitivity)
1260                         break;
1261                 }
1262
1263                 b_frames = FFMAX(0, i - 1);
1264
1265                 /* reset scores */
1266                 for (i = 0; i < b_frames + 1; i++) {
1267                     s->input_picture[i]->b_frame_score = 0;
1268                 }
1269             } else if (s->avctx->b_frame_strategy == 2) {
1270                 b_frames = estimate_best_b_count(s);
1271             } else {
1272                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1273                 b_frames = 0;
1274             }
1275
1276             emms_c();
1277             //static int b_count = 0;
1278             //b_count += b_frames;
1279             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1280
1281             for (i = b_frames - 1; i >= 0; i--) {
1282                 int type = s->input_picture[i]->f.pict_type;
1283                 if (type && type != AV_PICTURE_TYPE_B)
1284                     b_frames = i;
1285             }
1286             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1287                 b_frames == s->max_b_frames) {
1288                 av_log(s->avctx, AV_LOG_ERROR,
1289                        "warning, too many b frames in a row\n");
1290             }
1291
1292             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1293                 if ((s->flags2 & CODEC_FLAG2_STRICT_GOP) &&
1294                     s->gop_size > s->picture_in_gop_number) {
1295                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1296                 } else {
1297                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1298                         b_frames = 0;
1299                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1300                 }
1301             }
1302
1303             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1304                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1305                 b_frames--;
1306
1307             s->reordered_input_picture[0] = s->input_picture[b_frames];
1308             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1309                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1310             s->reordered_input_picture[0]->f.coded_picture_number =
1311                 s->coded_picture_number++;
1312             for (i = 0; i < b_frames; i++) {
1313                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1314                 s->reordered_input_picture[i + 1]->f.pict_type =
1315                     AV_PICTURE_TYPE_B;
1316                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1317                     s->coded_picture_number++;
1318             }
1319         }
1320     }
1321 no_output_pic:
1322     if (s->reordered_input_picture[0]) {
1323         s->reordered_input_picture[0]->f.reference =
1324            s->reordered_input_picture[0]->f.pict_type !=
1325                AV_PICTURE_TYPE_B ? 3 : 0;
1326
1327         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1328
1329         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1330             s->avctx->rc_buffer_size) {
1331             // input is a shared pix, so we can't modifiy it -> alloc a new
1332             // one & ensure that the shared one is reuseable
1333
1334             Picture *pic;
1335             int i = ff_find_unused_picture(s, 0);
1336             if (i < 0)
1337                 return i;
1338             pic = &s->picture[i];
1339
1340             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1341             if (ff_alloc_picture(s, pic, 0) < 0) {
1342                 return -1;
1343             }
1344
1345             /* mark us unused / free shared pic */
1346             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1347                 s->avctx->release_buffer(s->avctx,
1348                                          (AVFrame *) s->reordered_input_picture[0]);
1349             for (i = 0; i < 4; i++)
1350                 s->reordered_input_picture[0]->f.data[i] = NULL;
1351             s->reordered_input_picture[0]->f.type = 0;
1352
1353             copy_picture_attributes(s, (AVFrame *) pic,
1354                                     (AVFrame *) s->reordered_input_picture[0]);
1355
1356             s->current_picture_ptr = pic;
1357         } else {
1358             // input is not a shared pix -> reuse buffer for current_pix
1359
1360             assert(s->reordered_input_picture[0]->f.type ==
1361                        FF_BUFFER_TYPE_USER ||
1362                    s->reordered_input_picture[0]->f.type ==
1363                        FF_BUFFER_TYPE_INTERNAL);
1364
1365             s->current_picture_ptr = s->reordered_input_picture[0];
1366             for (i = 0; i < 4; i++) {
1367                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1368             }
1369         }
1370         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1371
1372         s->picture_number = s->new_picture.f.display_picture_number;
1373         //printf("dpn:%d\n", s->picture_number);
1374     } else {
1375         memset(&s->new_picture, 0, sizeof(Picture));
1376     }
1377     return 0;
1378 }
1379
1380 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1381                           const AVFrame *pic_arg, int *got_packet)
1382 {
1383     MpegEncContext *s = avctx->priv_data;
1384     int i, stuffing_count, ret;
1385     int context_count = s->slice_context_count;
1386
1387     if (!pkt->data &&
1388         (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1389         return ret;
1390
1391     for (i = 0; i < context_count; i++) {
1392         int start_y = s->thread_context[i]->start_mb_y;
1393         int   end_y = s->thread_context[i]->  end_mb_y;
1394         int h       = s->mb_height;
1395         uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1396         uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1397
1398         init_put_bits(&s->thread_context[i]->pb, start, end - start);
1399     }
1400
1401     s->picture_in_gop_number++;
1402
1403     if (load_input_picture(s, pic_arg) < 0)
1404         return -1;
1405
1406     if (select_input_picture(s) < 0) {
1407         return -1;
1408     }
1409
1410     /* output? */
1411     if (s->new_picture.f.data[0]) {
1412         s->pict_type = s->new_picture.f.pict_type;
1413         //emms_c();
1414         //printf("qs:%f %f %d\n", s->new_picture.quality,
1415         //       s->current_picture.quality, s->qscale);
1416         ff_MPV_frame_start(s, avctx);
1417 vbv_retry:
1418         if (encode_picture(s, s->picture_number) < 0)
1419             return -1;
1420
1421         avctx->header_bits = s->header_bits;
1422         avctx->mv_bits     = s->mv_bits;
1423         avctx->misc_bits   = s->misc_bits;
1424         avctx->i_tex_bits  = s->i_tex_bits;
1425         avctx->p_tex_bits  = s->p_tex_bits;
1426         avctx->i_count     = s->i_count;
1427         // FIXME f/b_count in avctx
1428         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1429         avctx->skip_count  = s->skip_count;
1430
1431         ff_MPV_frame_end(s);
1432
1433         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1434             ff_mjpeg_encode_picture_trailer(s);
1435
1436         if (avctx->rc_buffer_size) {
1437             RateControlContext *rcc = &s->rc_context;
1438             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1439
1440             if (put_bits_count(&s->pb) > max_size &&
1441                 s->lambda < s->avctx->lmax) {
1442                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1443                                        (s->qscale + 1) / s->qscale);
1444                 if (s->adaptive_quant) {
1445                     int i;
1446                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1447                         s->lambda_table[i] =
1448                             FFMAX(s->lambda_table[i] + 1,
1449                                   s->lambda_table[i] * (s->qscale + 1) /
1450                                   s->qscale);
1451                 }
1452                 s->mb_skipped = 0;        // done in MPV_frame_start()
1453                 // done in encode_picture() so we must undo it
1454                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1455                     if (s->flipflop_rounding          ||
1456                         s->codec_id == CODEC_ID_H263P ||
1457                         s->codec_id == CODEC_ID_MPEG4)
1458                         s->no_rounding ^= 1;
1459                 }
1460                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1461                     s->time_base       = s->last_time_base;
1462                     s->last_non_b_time = s->time - s->pp_time;
1463                 }
1464                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1465                 for (i = 0; i < context_count; i++) {
1466                     PutBitContext *pb = &s->thread_context[i]->pb;
1467                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1468                 }
1469                 goto vbv_retry;
1470             }
1471
1472             assert(s->avctx->rc_max_rate);
1473         }
1474
1475         if (s->flags & CODEC_FLAG_PASS1)
1476             ff_write_pass1_stats(s);
1477
1478         for (i = 0; i < 4; i++) {
1479             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1480             avctx->error[i] += s->current_picture_ptr->f.error[i];
1481         }
1482
1483         if (s->flags & CODEC_FLAG_PASS1)
1484             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1485                    avctx->i_tex_bits + avctx->p_tex_bits ==
1486                        put_bits_count(&s->pb));
1487         flush_put_bits(&s->pb);
1488         s->frame_bits  = put_bits_count(&s->pb);
1489
1490         stuffing_count = ff_vbv_update(s, s->frame_bits);
1491         if (stuffing_count) {
1492             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1493                     stuffing_count + 50) {
1494                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1495                 return -1;
1496             }
1497
1498             switch (s->codec_id) {
1499             case CODEC_ID_MPEG1VIDEO:
1500             case CODEC_ID_MPEG2VIDEO:
1501                 while (stuffing_count--) {
1502                     put_bits(&s->pb, 8, 0);
1503                 }
1504             break;
1505             case CODEC_ID_MPEG4:
1506                 put_bits(&s->pb, 16, 0);
1507                 put_bits(&s->pb, 16, 0x1C3);
1508                 stuffing_count -= 4;
1509                 while (stuffing_count--) {
1510                     put_bits(&s->pb, 8, 0xFF);
1511                 }
1512             break;
1513             default:
1514                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1515             }
1516             flush_put_bits(&s->pb);
1517             s->frame_bits  = put_bits_count(&s->pb);
1518         }
1519
1520         /* update mpeg1/2 vbv_delay for CBR */
1521         if (s->avctx->rc_max_rate                          &&
1522             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1523             s->out_format == FMT_MPEG1                     &&
1524             90000LL * (avctx->rc_buffer_size - 1) <=
1525                 s->avctx->rc_max_rate * 0xFFFFLL) {
1526             int vbv_delay, min_delay;
1527             double inbits  = s->avctx->rc_max_rate *
1528                              av_q2d(s->avctx->time_base);
1529             int    minbits = s->frame_bits - 8 *
1530                              (s->vbv_delay_ptr - s->pb.buf - 1);
1531             double bits    = s->rc_context.buffer_index + minbits - inbits;
1532
1533             if (bits < 0)
1534                 av_log(s->avctx, AV_LOG_ERROR,
1535                        "Internal error, negative bits\n");
1536
1537             assert(s->repeat_first_field == 0);
1538
1539             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1540             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1541                         s->avctx->rc_max_rate;
1542
1543             vbv_delay = FFMAX(vbv_delay, min_delay);
1544
1545             assert(vbv_delay < 0xFFFF);
1546
1547             s->vbv_delay_ptr[0] &= 0xF8;
1548             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1549             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1550             s->vbv_delay_ptr[2] &= 0x07;
1551             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1552             avctx->vbv_delay     = vbv_delay * 300;
1553         }
1554         s->total_bits     += s->frame_bits;
1555         avctx->frame_bits  = s->frame_bits;
1556
1557         pkt->pts = s->current_picture.f.pts;
1558         if (!s->low_delay) {
1559             if (!s->current_picture.f.coded_picture_number)
1560                 pkt->dts = pkt->pts - s->dts_delta;
1561             else
1562                 pkt->dts = s->reordered_pts;
1563             s->reordered_pts = s->input_picture[0]->f.pts;
1564         } else
1565             pkt->dts = pkt->pts;
1566         if (s->current_picture.f.key_frame)
1567             pkt->flags |= AV_PKT_FLAG_KEY;
1568     } else {
1569         assert((put_bits_ptr(&s->pb) == s->pb.buf));
1570         s->frame_bits = 0;
1571     }
1572     assert((s->frame_bits & 7) == 0);
1573
1574     pkt->size = s->frame_bits / 8;
1575     *got_packet = !!pkt->size;
1576     return 0;
1577 }
1578
1579 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1580                                                 int n, int threshold)
1581 {
1582     static const char tab[64] = {
1583         3, 2, 2, 1, 1, 1, 1, 1,
1584         1, 1, 1, 1, 1, 1, 1, 1,
1585         1, 1, 1, 1, 1, 1, 1, 1,
1586         0, 0, 0, 0, 0, 0, 0, 0,
1587         0, 0, 0, 0, 0, 0, 0, 0,
1588         0, 0, 0, 0, 0, 0, 0, 0,
1589         0, 0, 0, 0, 0, 0, 0, 0,
1590         0, 0, 0, 0, 0, 0, 0, 0
1591     };
1592     int score = 0;
1593     int run = 0;
1594     int i;
1595     DCTELEM *block = s->block[n];
1596     const int last_index = s->block_last_index[n];
1597     int skip_dc;
1598
1599     if (threshold < 0) {
1600         skip_dc = 0;
1601         threshold = -threshold;
1602     } else
1603         skip_dc = 1;
1604
1605     /* Are all we could set to zero already zero? */
1606     if (last_index <= skip_dc - 1)
1607         return;
1608
1609     for (i = 0; i <= last_index; i++) {
1610         const int j = s->intra_scantable.permutated[i];
1611         const int level = FFABS(block[j]);
1612         if (level == 1) {
1613             if (skip_dc && i == 0)
1614                 continue;
1615             score += tab[run];
1616             run = 0;
1617         } else if (level > 1) {
1618             return;
1619         } else {
1620             run++;
1621         }
1622     }
1623     if (score >= threshold)
1624         return;
1625     for (i = skip_dc; i <= last_index; i++) {
1626         const int j = s->intra_scantable.permutated[i];
1627         block[j] = 0;
1628     }
1629     if (block[0])
1630         s->block_last_index[n] = 0;
1631     else
1632         s->block_last_index[n] = -1;
1633 }
1634
1635 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1636                                int last_index)
1637 {
1638     int i;
1639     const int maxlevel = s->max_qcoeff;
1640     const int minlevel = s->min_qcoeff;
1641     int overflow = 0;
1642
1643     if (s->mb_intra) {
1644         i = 1; // skip clipping of intra dc
1645     } else
1646         i = 0;
1647
1648     for (; i <= last_index; i++) {
1649         const int j = s->intra_scantable.permutated[i];
1650         int level = block[j];
1651
1652         if (level > maxlevel) {
1653             level = maxlevel;
1654             overflow++;
1655         } else if (level < minlevel) {
1656             level = minlevel;
1657             overflow++;
1658         }
1659
1660         block[j] = level;
1661     }
1662
1663     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1664         av_log(s->avctx, AV_LOG_INFO,
1665                "warning, clipping %d dct coefficients to %d..%d\n",
1666                overflow, minlevel, maxlevel);
1667 }
1668
1669 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1670 {
1671     int x, y;
1672     // FIXME optimize
1673     for (y = 0; y < 8; y++) {
1674         for (x = 0; x < 8; x++) {
1675             int x2, y2;
1676             int sum = 0;
1677             int sqr = 0;
1678             int count = 0;
1679
1680             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1681                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1682                     int v = ptr[x2 + y2 * stride];
1683                     sum += v;
1684                     sqr += v * v;
1685                     count++;
1686                 }
1687             }
1688             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1689         }
1690     }
1691 }
1692
1693 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1694                                                 int motion_x, int motion_y,
1695                                                 int mb_block_height,
1696                                                 int mb_block_count)
1697 {
1698     int16_t weight[8][64];
1699     DCTELEM orig[8][64];
1700     const int mb_x = s->mb_x;
1701     const int mb_y = s->mb_y;
1702     int i;
1703     int skip_dct[8];
1704     int dct_offset = s->linesize * 8; // default for progressive frames
1705     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1706     int wrap_y, wrap_c;
1707
1708     for (i = 0; i < mb_block_count; i++)
1709         skip_dct[i] = s->skipdct;
1710
1711     if (s->adaptive_quant) {
1712         const int last_qp = s->qscale;
1713         const int mb_xy = mb_x + mb_y * s->mb_stride;
1714
1715         s->lambda = s->lambda_table[mb_xy];
1716         update_qscale(s);
1717
1718         if (!(s->flags & CODEC_FLAG_QP_RD)) {
1719             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1720             s->dquant = s->qscale - last_qp;
1721
1722             if (s->out_format == FMT_H263) {
1723                 s->dquant = av_clip(s->dquant, -2, 2);
1724
1725                 if (s->codec_id == CODEC_ID_MPEG4) {
1726                     if (!s->mb_intra) {
1727                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1728                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1729                                 s->dquant = 0;
1730                         }
1731                         if (s->mv_type == MV_TYPE_8X8)
1732                             s->dquant = 0;
1733                     }
1734                 }
1735             }
1736         }
1737         ff_set_qscale(s, last_qp + s->dquant);
1738     } else if (s->flags & CODEC_FLAG_QP_RD)
1739         ff_set_qscale(s, s->qscale + s->dquant);
1740
1741     wrap_y = s->linesize;
1742     wrap_c = s->uvlinesize;
1743     ptr_y  = s->new_picture.f.data[0] +
1744              (mb_y * 16 * wrap_y)              + mb_x * 16;
1745     ptr_cb = s->new_picture.f.data[1] +
1746              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1747     ptr_cr = s->new_picture.f.data[2] +
1748              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1749
1750     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1751         uint8_t *ebuf = s->edge_emu_buffer + 32;
1752         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1753                                 mb_y * 16, s->width, s->height);
1754         ptr_y = ebuf;
1755         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1756                                 mb_block_height, mb_x * 8, mb_y * 8,
1757                                 s->width >> 1, s->height >> 1);
1758         ptr_cb = ebuf + 18 * wrap_y;
1759         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1760                                 mb_block_height, mb_x * 8, mb_y * 8,
1761                                 s->width >> 1, s->height >> 1);
1762         ptr_cr = ebuf + 18 * wrap_y + 8;
1763     }
1764
1765     if (s->mb_intra) {
1766         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1767             int progressive_score, interlaced_score;
1768
1769             s->interlaced_dct = 0;
1770             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1771                                                     NULL, wrap_y, 8) +
1772                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1773                                                     NULL, wrap_y, 8) - 400;
1774
1775             if (progressive_score > 0) {
1776                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1777                                                        NULL, wrap_y * 2, 8) +
1778                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1779                                                        NULL, wrap_y * 2, 8);
1780                 if (progressive_score > interlaced_score) {
1781                     s->interlaced_dct = 1;
1782
1783                     dct_offset = wrap_y;
1784                     wrap_y <<= 1;
1785                     if (s->chroma_format == CHROMA_422)
1786                         wrap_c <<= 1;
1787                 }
1788             }
1789         }
1790
1791         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1792         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1793         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1794         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1795
1796         if (s->flags & CODEC_FLAG_GRAY) {
1797             skip_dct[4] = 1;
1798             skip_dct[5] = 1;
1799         } else {
1800             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1801             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1802             if (!s->chroma_y_shift) { /* 422 */
1803                 s->dsp.get_pixels(s->block[6],
1804                                   ptr_cb + (dct_offset >> 1), wrap_c);
1805                 s->dsp.get_pixels(s->block[7],
1806                                   ptr_cr + (dct_offset >> 1), wrap_c);
1807             }
1808         }
1809     } else {
1810         op_pixels_func (*op_pix)[4];
1811         qpel_mc_func (*op_qpix)[16];
1812         uint8_t *dest_y, *dest_cb, *dest_cr;
1813
1814         dest_y  = s->dest[0];
1815         dest_cb = s->dest[1];
1816         dest_cr = s->dest[2];
1817
1818         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1819             op_pix  = s->dsp.put_pixels_tab;
1820             op_qpix = s->dsp.put_qpel_pixels_tab;
1821         } else {
1822             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1823             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1824         }
1825
1826         if (s->mv_dir & MV_DIR_FORWARD) {
1827             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data,
1828                        op_pix, op_qpix);
1829             op_pix  = s->dsp.avg_pixels_tab;
1830             op_qpix = s->dsp.avg_qpel_pixels_tab;
1831         }
1832         if (s->mv_dir & MV_DIR_BACKWARD) {
1833             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data,
1834                        op_pix, op_qpix);
1835         }
1836
1837         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1838             int progressive_score, interlaced_score;
1839
1840             s->interlaced_dct = 0;
1841             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1842                                                     ptr_y,              wrap_y,
1843                                                     8) +
1844                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1845                                                     ptr_y + wrap_y * 8, wrap_y,
1846                                                     8) - 400;
1847
1848             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1849                 progressive_score -= 400;
1850
1851             if (progressive_score > 0) {
1852                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1853                                                        ptr_y,
1854                                                        wrap_y * 2, 8) +
1855                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1856                                                        ptr_y + wrap_y,
1857                                                        wrap_y * 2, 8);
1858
1859                 if (progressive_score > interlaced_score) {
1860                     s->interlaced_dct = 1;
1861
1862                     dct_offset = wrap_y;
1863                     wrap_y <<= 1;
1864                     if (s->chroma_format == CHROMA_422)
1865                         wrap_c <<= 1;
1866                 }
1867             }
1868         }
1869
1870         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1871         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1872         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1873                            dest_y + dct_offset, wrap_y);
1874         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1875                            dest_y + dct_offset + 8, wrap_y);
1876
1877         if (s->flags & CODEC_FLAG_GRAY) {
1878             skip_dct[4] = 1;
1879             skip_dct[5] = 1;
1880         } else {
1881             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1882             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1883             if (!s->chroma_y_shift) { /* 422 */
1884                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1885                                    dest_cb + (dct_offset >> 1), wrap_c);
1886                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1887                                    dest_cr + (dct_offset >> 1), wrap_c);
1888             }
1889         }
1890         /* pre quantization */
1891         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1892                 2 * s->qscale * s->qscale) {
1893             // FIXME optimize
1894             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1895                               wrap_y, 8) < 20 * s->qscale)
1896                 skip_dct[0] = 1;
1897             if (s->dsp.sad[1](NULL, ptr_y + 8,
1898                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1899                 skip_dct[1] = 1;
1900             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1901                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1902                 skip_dct[2] = 1;
1903             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1904                               dest_y + dct_offset + 8,
1905                               wrap_y, 8) < 20 * s->qscale)
1906                 skip_dct[3] = 1;
1907             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1908                               wrap_c, 8) < 20 * s->qscale)
1909                 skip_dct[4] = 1;
1910             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1911                               wrap_c, 8) < 20 * s->qscale)
1912                 skip_dct[5] = 1;
1913             if (!s->chroma_y_shift) { /* 422 */
1914                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1915                                   dest_cb + (dct_offset >> 1),
1916                                   wrap_c, 8) < 20 * s->qscale)
1917                     skip_dct[6] = 1;
1918                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1919                                   dest_cr + (dct_offset >> 1),
1920                                   wrap_c, 8) < 20 * s->qscale)
1921                     skip_dct[7] = 1;
1922             }
1923         }
1924     }
1925
1926     if (s->avctx->quantizer_noise_shaping) {
1927         if (!skip_dct[0])
1928             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1929         if (!skip_dct[1])
1930             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1931         if (!skip_dct[2])
1932             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1933         if (!skip_dct[3])
1934             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1935         if (!skip_dct[4])
1936             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1937         if (!skip_dct[5])
1938             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1939         if (!s->chroma_y_shift) { /* 422 */
1940             if (!skip_dct[6])
1941                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1942                                   wrap_c);
1943             if (!skip_dct[7])
1944                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1945                                   wrap_c);
1946         }
1947         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
1948     }
1949
1950     /* DCT & quantize */
1951     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1952     {
1953         for (i = 0; i < mb_block_count; i++) {
1954             if (!skip_dct[i]) {
1955                 int overflow;
1956                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1957                 // FIXME we could decide to change to quantizer instead of
1958                 // clipping
1959                 // JS: I don't think that would be a good idea it could lower
1960                 //     quality instead of improve it. Just INTRADC clipping
1961                 //     deserves changes in quantizer
1962                 if (overflow)
1963                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
1964             } else
1965                 s->block_last_index[i] = -1;
1966         }
1967         if (s->avctx->quantizer_noise_shaping) {
1968             for (i = 0; i < mb_block_count; i++) {
1969                 if (!skip_dct[i]) {
1970                     s->block_last_index[i] =
1971                         dct_quantize_refine(s, s->block[i], weight[i],
1972                                             orig[i], i, s->qscale);
1973                 }
1974             }
1975         }
1976
1977         if (s->luma_elim_threshold && !s->mb_intra)
1978             for (i = 0; i < 4; i++)
1979                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
1980         if (s->chroma_elim_threshold && !s->mb_intra)
1981             for (i = 4; i < mb_block_count; i++)
1982                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
1983
1984         if (s->flags & CODEC_FLAG_CBP_RD) {
1985             for (i = 0; i < mb_block_count; i++) {
1986                 if (s->block_last_index[i] == -1)
1987                     s->coded_score[i] = INT_MAX / 256;
1988             }
1989         }
1990     }
1991
1992     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
1993         s->block_last_index[4] =
1994         s->block_last_index[5] = 0;
1995         s->block[4][0] =
1996         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
1997     }
1998
1999     // non c quantize code returns incorrect block_last_index FIXME
2000     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2001         for (i = 0; i < mb_block_count; i++) {
2002             int j;
2003             if (s->block_last_index[i] > 0) {
2004                 for (j = 63; j > 0; j--) {
2005                     if (s->block[i][s->intra_scantable.permutated[j]])
2006                         break;
2007                 }
2008                 s->block_last_index[i] = j;
2009             }
2010         }
2011     }
2012
2013     /* huffman encode */
2014     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2015     case CODEC_ID_MPEG1VIDEO:
2016     case CODEC_ID_MPEG2VIDEO:
2017         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2018             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2019         break;
2020     case CODEC_ID_MPEG4:
2021         if (CONFIG_MPEG4_ENCODER)
2022             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2023         break;
2024     case CODEC_ID_MSMPEG4V2:
2025     case CODEC_ID_MSMPEG4V3:
2026     case CODEC_ID_WMV1:
2027         if (CONFIG_MSMPEG4_ENCODER)
2028             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2029         break;
2030     case CODEC_ID_WMV2:
2031         if (CONFIG_WMV2_ENCODER)
2032             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2033         break;
2034     case CODEC_ID_H261:
2035         if (CONFIG_H261_ENCODER)
2036             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2037         break;
2038     case CODEC_ID_H263:
2039     case CODEC_ID_H263P:
2040     case CODEC_ID_FLV1:
2041     case CODEC_ID_RV10:
2042     case CODEC_ID_RV20:
2043         if (CONFIG_H263_ENCODER)
2044             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2045         break;
2046     case CODEC_ID_MJPEG:
2047         if (CONFIG_MJPEG_ENCODER)
2048             ff_mjpeg_encode_mb(s, s->block);
2049         break;
2050     default:
2051         assert(0);
2052     }
2053 }
2054
2055 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2056 {
2057     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2058     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2059 }
2060
2061 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2062     int i;
2063
2064     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2065
2066     /* mpeg1 */
2067     d->mb_skip_run= s->mb_skip_run;
2068     for(i=0; i<3; i++)
2069         d->last_dc[i] = s->last_dc[i];
2070
2071     /* statistics */
2072     d->mv_bits= s->mv_bits;
2073     d->i_tex_bits= s->i_tex_bits;
2074     d->p_tex_bits= s->p_tex_bits;
2075     d->i_count= s->i_count;
2076     d->f_count= s->f_count;
2077     d->b_count= s->b_count;
2078     d->skip_count= s->skip_count;
2079     d->misc_bits= s->misc_bits;
2080     d->last_bits= 0;
2081
2082     d->mb_skipped= 0;
2083     d->qscale= s->qscale;
2084     d->dquant= s->dquant;
2085
2086     d->esc3_level_length= s->esc3_level_length;
2087 }
2088
2089 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2090     int i;
2091
2092     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2093     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2094
2095     /* mpeg1 */
2096     d->mb_skip_run= s->mb_skip_run;
2097     for(i=0; i<3; i++)
2098         d->last_dc[i] = s->last_dc[i];
2099
2100     /* statistics */
2101     d->mv_bits= s->mv_bits;
2102     d->i_tex_bits= s->i_tex_bits;
2103     d->p_tex_bits= s->p_tex_bits;
2104     d->i_count= s->i_count;
2105     d->f_count= s->f_count;
2106     d->b_count= s->b_count;
2107     d->skip_count= s->skip_count;
2108     d->misc_bits= s->misc_bits;
2109
2110     d->mb_intra= s->mb_intra;
2111     d->mb_skipped= s->mb_skipped;
2112     d->mv_type= s->mv_type;
2113     d->mv_dir= s->mv_dir;
2114     d->pb= s->pb;
2115     if(s->data_partitioning){
2116         d->pb2= s->pb2;
2117         d->tex_pb= s->tex_pb;
2118     }
2119     d->block= s->block;
2120     for(i=0; i<8; i++)
2121         d->block_last_index[i]= s->block_last_index[i];
2122     d->interlaced_dct= s->interlaced_dct;
2123     d->qscale= s->qscale;
2124
2125     d->esc3_level_length= s->esc3_level_length;
2126 }
2127
2128 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2129                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2130                            int *dmin, int *next_block, int motion_x, int motion_y)
2131 {
2132     int score;
2133     uint8_t *dest_backup[3];
2134
2135     copy_context_before_encode(s, backup, type);
2136
2137     s->block= s->blocks[*next_block];
2138     s->pb= pb[*next_block];
2139     if(s->data_partitioning){
2140         s->pb2   = pb2   [*next_block];
2141         s->tex_pb= tex_pb[*next_block];
2142     }
2143
2144     if(*next_block){
2145         memcpy(dest_backup, s->dest, sizeof(s->dest));
2146         s->dest[0] = s->rd_scratchpad;
2147         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2148         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2149         assert(s->linesize >= 32); //FIXME
2150     }
2151
2152     encode_mb(s, motion_x, motion_y);
2153
2154     score= put_bits_count(&s->pb);
2155     if(s->data_partitioning){
2156         score+= put_bits_count(&s->pb2);
2157         score+= put_bits_count(&s->tex_pb);
2158     }
2159
2160     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2161         ff_MPV_decode_mb(s, s->block);
2162
2163         score *= s->lambda2;
2164         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2165     }
2166
2167     if(*next_block){
2168         memcpy(s->dest, dest_backup, sizeof(s->dest));
2169     }
2170
2171     if(score<*dmin){
2172         *dmin= score;
2173         *next_block^=1;
2174
2175         copy_context_after_encode(best, s, type);
2176     }
2177 }
2178
2179 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2180     uint32_t *sq = ff_squareTbl + 256;
2181     int acc=0;
2182     int x,y;
2183
2184     if(w==16 && h==16)
2185         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2186     else if(w==8 && h==8)
2187         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2188
2189     for(y=0; y<h; y++){
2190         for(x=0; x<w; x++){
2191             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2192         }
2193     }
2194
2195     assert(acc>=0);
2196
2197     return acc;
2198 }
2199
2200 static int sse_mb(MpegEncContext *s){
2201     int w= 16;
2202     int h= 16;
2203
2204     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2205     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2206
2207     if(w==16 && h==16)
2208       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2209         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2210                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2211                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2212       }else{
2213         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2214                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2215                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2216       }
2217     else
2218         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2219                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2220                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2221 }
2222
2223 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2224     MpegEncContext *s= *(void**)arg;
2225
2226
2227     s->me.pre_pass=1;
2228     s->me.dia_size= s->avctx->pre_dia_size;
2229     s->first_slice_line=1;
2230     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2231         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2232             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2233         }
2234         s->first_slice_line=0;
2235     }
2236
2237     s->me.pre_pass=0;
2238
2239     return 0;
2240 }
2241
2242 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2243     MpegEncContext *s= *(void**)arg;
2244
2245     ff_check_alignment();
2246
2247     s->me.dia_size= s->avctx->dia_size;
2248     s->first_slice_line=1;
2249     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2250         s->mb_x=0; //for block init below
2251         ff_init_block_index(s);
2252         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2253             s->block_index[0]+=2;
2254             s->block_index[1]+=2;
2255             s->block_index[2]+=2;
2256             s->block_index[3]+=2;
2257
2258             /* compute motion vector & mb_type and store in context */
2259             if(s->pict_type==AV_PICTURE_TYPE_B)
2260                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2261             else
2262                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2263         }
2264         s->first_slice_line=0;
2265     }
2266     return 0;
2267 }
2268
2269 static int mb_var_thread(AVCodecContext *c, void *arg){
2270     MpegEncContext *s= *(void**)arg;
2271     int mb_x, mb_y;
2272
2273     ff_check_alignment();
2274
2275     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2276         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2277             int xx = mb_x * 16;
2278             int yy = mb_y * 16;
2279             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2280             int varc;
2281             int sum = s->dsp.pix_sum(pix, s->linesize);
2282
2283             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2284
2285             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2286             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2287             s->me.mb_var_sum_temp    += varc;
2288         }
2289     }
2290     return 0;
2291 }
2292
2293 static void write_slice_end(MpegEncContext *s){
2294     if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4){
2295         if(s->partitioned_frame){
2296             ff_mpeg4_merge_partitions(s);
2297         }
2298
2299         ff_mpeg4_stuffing(&s->pb);
2300     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2301         ff_mjpeg_encode_stuffing(&s->pb);
2302     }
2303
2304     avpriv_align_put_bits(&s->pb);
2305     flush_put_bits(&s->pb);
2306
2307     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2308         s->misc_bits+= get_bits_diff(s);
2309 }
2310
2311 static int encode_thread(AVCodecContext *c, void *arg){
2312     MpegEncContext *s= *(void**)arg;
2313     int mb_x, mb_y, pdif = 0;
2314     int chr_h= 16>>s->chroma_y_shift;
2315     int i, j;
2316     MpegEncContext best_s, backup_s;
2317     uint8_t bit_buf[2][MAX_MB_BYTES];
2318     uint8_t bit_buf2[2][MAX_MB_BYTES];
2319     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2320     PutBitContext pb[2], pb2[2], tex_pb[2];
2321 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2322
2323     ff_check_alignment();
2324
2325     for(i=0; i<2; i++){
2326         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2327         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2328         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2329     }
2330
2331     s->last_bits= put_bits_count(&s->pb);
2332     s->mv_bits=0;
2333     s->misc_bits=0;
2334     s->i_tex_bits=0;
2335     s->p_tex_bits=0;
2336     s->i_count=0;
2337     s->f_count=0;
2338     s->b_count=0;
2339     s->skip_count=0;
2340
2341     for(i=0; i<3; i++){
2342         /* init last dc values */
2343         /* note: quant matrix value (8) is implied here */
2344         s->last_dc[i] = 128 << s->intra_dc_precision;
2345
2346         s->current_picture.f.error[i] = 0;
2347     }
2348     s->mb_skip_run = 0;
2349     memset(s->last_mv, 0, sizeof(s->last_mv));
2350
2351     s->last_mv_dir = 0;
2352
2353     switch(s->codec_id){
2354     case CODEC_ID_H263:
2355     case CODEC_ID_H263P:
2356     case CODEC_ID_FLV1:
2357         if (CONFIG_H263_ENCODER)
2358             s->gob_index = ff_h263_get_gob_height(s);
2359         break;
2360     case CODEC_ID_MPEG4:
2361         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2362             ff_mpeg4_init_partitions(s);
2363         break;
2364     }
2365
2366     s->resync_mb_x=0;
2367     s->resync_mb_y=0;
2368     s->first_slice_line = 1;
2369     s->ptr_lastgob = s->pb.buf;
2370     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2371 //    printf("row %d at %X\n", s->mb_y, (int)s);
2372         s->mb_x=0;
2373         s->mb_y= mb_y;
2374
2375         ff_set_qscale(s, s->qscale);
2376         ff_init_block_index(s);
2377
2378         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2379             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2380             int mb_type= s->mb_type[xy];
2381 //            int d;
2382             int dmin= INT_MAX;
2383             int dir;
2384
2385             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2386                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2387                 return -1;
2388             }
2389             if(s->data_partitioning){
2390                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2391                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2392                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2393                     return -1;
2394                 }
2395             }
2396
2397             s->mb_x = mb_x;
2398             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2399             ff_update_block_index(s);
2400
2401             if(CONFIG_H261_ENCODER && s->codec_id == CODEC_ID_H261){
2402                 ff_h261_reorder_mb_index(s);
2403                 xy= s->mb_y*s->mb_stride + s->mb_x;
2404                 mb_type= s->mb_type[xy];
2405             }
2406
2407             /* write gob / video packet header  */
2408             if(s->rtp_mode){
2409                 int current_packet_size, is_gob_start;
2410
2411                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2412
2413                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2414
2415                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2416
2417                 switch(s->codec_id){
2418                 case CODEC_ID_H263:
2419                 case CODEC_ID_H263P:
2420                     if(!s->h263_slice_structured)
2421                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2422                     break;
2423                 case CODEC_ID_MPEG2VIDEO:
2424                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2425                 case CODEC_ID_MPEG1VIDEO:
2426                     if(s->mb_skip_run) is_gob_start=0;
2427                     break;
2428                 }
2429
2430                 if(is_gob_start){
2431                     if(s->start_mb_y != mb_y || mb_x!=0){
2432                         write_slice_end(s);
2433
2434                         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
2435                             ff_mpeg4_init_partitions(s);
2436                         }
2437                     }
2438
2439                     assert((put_bits_count(&s->pb)&7) == 0);
2440                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2441
2442                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2443                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2444                         int d= 100 / s->avctx->error_rate;
2445                         if(r % d == 0){
2446                             current_packet_size=0;
2447                             s->pb.buf_ptr= s->ptr_lastgob;
2448                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2449                         }
2450                     }
2451
2452                     if (s->avctx->rtp_callback){
2453                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2454                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2455                     }
2456
2457                     switch(s->codec_id){
2458                     case CODEC_ID_MPEG4:
2459                         if (CONFIG_MPEG4_ENCODER) {
2460                             ff_mpeg4_encode_video_packet_header(s);
2461                             ff_mpeg4_clean_buffers(s);
2462                         }
2463                     break;
2464                     case CODEC_ID_MPEG1VIDEO:
2465                     case CODEC_ID_MPEG2VIDEO:
2466                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2467                             ff_mpeg1_encode_slice_header(s);
2468                             ff_mpeg1_clean_buffers(s);
2469                         }
2470                     break;
2471                     case CODEC_ID_H263:
2472                     case CODEC_ID_H263P:
2473                         if (CONFIG_H263_ENCODER)
2474                             ff_h263_encode_gob_header(s, mb_y);
2475                     break;
2476                     }
2477
2478                     if(s->flags&CODEC_FLAG_PASS1){
2479                         int bits= put_bits_count(&s->pb);
2480                         s->misc_bits+= bits - s->last_bits;
2481                         s->last_bits= bits;
2482                     }
2483
2484                     s->ptr_lastgob += current_packet_size;
2485                     s->first_slice_line=1;
2486                     s->resync_mb_x=mb_x;
2487                     s->resync_mb_y=mb_y;
2488                 }
2489             }
2490
2491             if(  (s->resync_mb_x   == s->mb_x)
2492                && s->resync_mb_y+1 == s->mb_y){
2493                 s->first_slice_line=0;
2494             }
2495
2496             s->mb_skipped=0;
2497             s->dquant=0; //only for QP_RD
2498
2499             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
2500                 int next_block=0;
2501                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2502
2503                 copy_context_before_encode(&backup_s, s, -1);
2504                 backup_s.pb= s->pb;
2505                 best_s.data_partitioning= s->data_partitioning;
2506                 best_s.partitioned_frame= s->partitioned_frame;
2507                 if(s->data_partitioning){
2508                     backup_s.pb2= s->pb2;
2509                     backup_s.tex_pb= s->tex_pb;
2510                 }
2511
2512                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2513                     s->mv_dir = MV_DIR_FORWARD;
2514                     s->mv_type = MV_TYPE_16X16;
2515                     s->mb_intra= 0;
2516                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2517                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2518                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2519                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2520                 }
2521                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2522                     s->mv_dir = MV_DIR_FORWARD;
2523                     s->mv_type = MV_TYPE_FIELD;
2524                     s->mb_intra= 0;
2525                     for(i=0; i<2; i++){
2526                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2527                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2528                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2529                     }
2530                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2531                                  &dmin, &next_block, 0, 0);
2532                 }
2533                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2534                     s->mv_dir = MV_DIR_FORWARD;
2535                     s->mv_type = MV_TYPE_16X16;
2536                     s->mb_intra= 0;
2537                     s->mv[0][0][0] = 0;
2538                     s->mv[0][0][1] = 0;
2539                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2540                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2541                 }
2542                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2543                     s->mv_dir = MV_DIR_FORWARD;
2544                     s->mv_type = MV_TYPE_8X8;
2545                     s->mb_intra= 0;
2546                     for(i=0; i<4; i++){
2547                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2548                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2549                     }
2550                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2551                                  &dmin, &next_block, 0, 0);
2552                 }
2553                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2554                     s->mv_dir = MV_DIR_FORWARD;
2555                     s->mv_type = MV_TYPE_16X16;
2556                     s->mb_intra= 0;
2557                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2558                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2559                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2560                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2561                 }
2562                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2563                     s->mv_dir = MV_DIR_BACKWARD;
2564                     s->mv_type = MV_TYPE_16X16;
2565                     s->mb_intra= 0;
2566                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2567                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2568                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2569                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2570                 }
2571                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2572                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2573                     s->mv_type = MV_TYPE_16X16;
2574                     s->mb_intra= 0;
2575                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2576                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2577                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2578                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2579                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2580                                  &dmin, &next_block, 0, 0);
2581                 }
2582                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2583                     s->mv_dir = MV_DIR_FORWARD;
2584                     s->mv_type = MV_TYPE_FIELD;
2585                     s->mb_intra= 0;
2586                     for(i=0; i<2; i++){
2587                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2588                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2589                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2590                     }
2591                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2592                                  &dmin, &next_block, 0, 0);
2593                 }
2594                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2595                     s->mv_dir = MV_DIR_BACKWARD;
2596                     s->mv_type = MV_TYPE_FIELD;
2597                     s->mb_intra= 0;
2598                     for(i=0; i<2; i++){
2599                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2600                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2601                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2602                     }
2603                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2604                                  &dmin, &next_block, 0, 0);
2605                 }
2606                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2607                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2608                     s->mv_type = MV_TYPE_FIELD;
2609                     s->mb_intra= 0;
2610                     for(dir=0; dir<2; dir++){
2611                         for(i=0; i<2; i++){
2612                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2613                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2614                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2615                         }
2616                     }
2617                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2618                                  &dmin, &next_block, 0, 0);
2619                 }
2620                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2621                     s->mv_dir = 0;
2622                     s->mv_type = MV_TYPE_16X16;
2623                     s->mb_intra= 1;
2624                     s->mv[0][0][0] = 0;
2625                     s->mv[0][0][1] = 0;
2626                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2627                                  &dmin, &next_block, 0, 0);
2628                     if(s->h263_pred || s->h263_aic){
2629                         if(best_s.mb_intra)
2630                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2631                         else
2632                             ff_clean_intra_table_entries(s); //old mode?
2633                     }
2634                 }
2635
2636                 if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
2637                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2638                         const int last_qp= backup_s.qscale;
2639                         int qpi, qp, dc[6];
2640                         DCTELEM ac[6][16];
2641                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2642                         static const int dquant_tab[4]={-1,1,-2,2};
2643
2644                         assert(backup_s.dquant == 0);
2645
2646                         //FIXME intra
2647                         s->mv_dir= best_s.mv_dir;
2648                         s->mv_type = MV_TYPE_16X16;
2649                         s->mb_intra= best_s.mb_intra;
2650                         s->mv[0][0][0] = best_s.mv[0][0][0];
2651                         s->mv[0][0][1] = best_s.mv[0][0][1];
2652                         s->mv[1][0][0] = best_s.mv[1][0][0];
2653                         s->mv[1][0][1] = best_s.mv[1][0][1];
2654
2655                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2656                         for(; qpi<4; qpi++){
2657                             int dquant= dquant_tab[qpi];
2658                             qp= last_qp + dquant;
2659                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2660                                 continue;
2661                             backup_s.dquant= dquant;
2662                             if(s->mb_intra && s->dc_val[0]){
2663                                 for(i=0; i<6; i++){
2664                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2665                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2666                                 }
2667                             }
2668
2669                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2670                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2671                             if(best_s.qscale != qp){
2672                                 if(s->mb_intra && s->dc_val[0]){
2673                                     for(i=0; i<6; i++){
2674                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2675                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2676                                     }
2677                                 }
2678                             }
2679                         }
2680                     }
2681                 }
2682                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2683                     int mx= s->b_direct_mv_table[xy][0];
2684                     int my= s->b_direct_mv_table[xy][1];
2685
2686                     backup_s.dquant = 0;
2687                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2688                     s->mb_intra= 0;
2689                     ff_mpeg4_set_direct_mv(s, mx, my);
2690                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2691                                  &dmin, &next_block, mx, my);
2692                 }
2693                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2694                     backup_s.dquant = 0;
2695                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2696                     s->mb_intra= 0;
2697                     ff_mpeg4_set_direct_mv(s, 0, 0);
2698                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2699                                  &dmin, &next_block, 0, 0);
2700                 }
2701                 if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){
2702                     int coded=0;
2703                     for(i=0; i<6; i++)
2704                         coded |= s->block_last_index[i];
2705                     if(coded){
2706                         int mx,my;
2707                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2708                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2709                             mx=my=0; //FIXME find the one we actually used
2710                             ff_mpeg4_set_direct_mv(s, mx, my);
2711                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2712                             mx= s->mv[1][0][0];
2713                             my= s->mv[1][0][1];
2714                         }else{
2715                             mx= s->mv[0][0][0];
2716                             my= s->mv[0][0][1];
2717                         }
2718
2719                         s->mv_dir= best_s.mv_dir;
2720                         s->mv_type = best_s.mv_type;
2721                         s->mb_intra= 0;
2722 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2723                         s->mv[0][0][1] = best_s.mv[0][0][1];
2724                         s->mv[1][0][0] = best_s.mv[1][0][0];
2725                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2726                         backup_s.dquant= 0;
2727                         s->skipdct=1;
2728                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2729                                         &dmin, &next_block, mx, my);
2730                         s->skipdct=0;
2731                     }
2732                 }
2733
2734                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2735
2736                 copy_context_after_encode(s, &best_s, -1);
2737
2738                 pb_bits_count= put_bits_count(&s->pb);
2739                 flush_put_bits(&s->pb);
2740                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2741                 s->pb= backup_s.pb;
2742
2743                 if(s->data_partitioning){
2744                     pb2_bits_count= put_bits_count(&s->pb2);
2745                     flush_put_bits(&s->pb2);
2746                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2747                     s->pb2= backup_s.pb2;
2748
2749                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2750                     flush_put_bits(&s->tex_pb);
2751                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2752                     s->tex_pb= backup_s.tex_pb;
2753                 }
2754                 s->last_bits= put_bits_count(&s->pb);
2755
2756                 if (CONFIG_H263_ENCODER &&
2757                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2758                     ff_h263_update_motion_val(s);
2759
2760                 if(next_block==0){ //FIXME 16 vs linesize16
2761                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2762                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2763                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2764                 }
2765
2766                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2767                     ff_MPV_decode_mb(s, s->block);
2768             } else {
2769                 int motion_x = 0, motion_y = 0;
2770                 s->mv_type=MV_TYPE_16X16;
2771                 // only one MB-Type possible
2772
2773                 switch(mb_type){
2774                 case CANDIDATE_MB_TYPE_INTRA:
2775                     s->mv_dir = 0;
2776                     s->mb_intra= 1;
2777                     motion_x= s->mv[0][0][0] = 0;
2778                     motion_y= s->mv[0][0][1] = 0;
2779                     break;
2780                 case CANDIDATE_MB_TYPE_INTER:
2781                     s->mv_dir = MV_DIR_FORWARD;
2782                     s->mb_intra= 0;
2783                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2784                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2785                     break;
2786                 case CANDIDATE_MB_TYPE_INTER_I:
2787                     s->mv_dir = MV_DIR_FORWARD;
2788                     s->mv_type = MV_TYPE_FIELD;
2789                     s->mb_intra= 0;
2790                     for(i=0; i<2; i++){
2791                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2792                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2793                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2794                     }
2795                     break;
2796                 case CANDIDATE_MB_TYPE_INTER4V:
2797                     s->mv_dir = MV_DIR_FORWARD;
2798                     s->mv_type = MV_TYPE_8X8;
2799                     s->mb_intra= 0;
2800                     for(i=0; i<4; i++){
2801                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2802                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2803                     }
2804                     break;
2805                 case CANDIDATE_MB_TYPE_DIRECT:
2806                     if (CONFIG_MPEG4_ENCODER) {
2807                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2808                         s->mb_intra= 0;
2809                         motion_x=s->b_direct_mv_table[xy][0];
2810                         motion_y=s->b_direct_mv_table[xy][1];
2811                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2812                     }
2813                     break;
2814                 case CANDIDATE_MB_TYPE_DIRECT0:
2815                     if (CONFIG_MPEG4_ENCODER) {
2816                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2817                         s->mb_intra= 0;
2818                         ff_mpeg4_set_direct_mv(s, 0, 0);
2819                     }
2820                     break;
2821                 case CANDIDATE_MB_TYPE_BIDIR:
2822                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2823                     s->mb_intra= 0;
2824                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2825                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2826                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2827                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2828                     break;
2829                 case CANDIDATE_MB_TYPE_BACKWARD:
2830                     s->mv_dir = MV_DIR_BACKWARD;
2831                     s->mb_intra= 0;
2832                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2833                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2834                     break;
2835                 case CANDIDATE_MB_TYPE_FORWARD:
2836                     s->mv_dir = MV_DIR_FORWARD;
2837                     s->mb_intra= 0;
2838                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2839                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2840 //                    printf(" %d %d ", motion_x, motion_y);
2841                     break;
2842                 case CANDIDATE_MB_TYPE_FORWARD_I:
2843                     s->mv_dir = MV_DIR_FORWARD;
2844                     s->mv_type = MV_TYPE_FIELD;
2845                     s->mb_intra= 0;
2846                     for(i=0; i<2; i++){
2847                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2848                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2849                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2850                     }
2851                     break;
2852                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2853                     s->mv_dir = MV_DIR_BACKWARD;
2854                     s->mv_type = MV_TYPE_FIELD;
2855                     s->mb_intra= 0;
2856                     for(i=0; i<2; i++){
2857                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2858                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2859                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2860                     }
2861                     break;
2862                 case CANDIDATE_MB_TYPE_BIDIR_I:
2863                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2864                     s->mv_type = MV_TYPE_FIELD;
2865                     s->mb_intra= 0;
2866                     for(dir=0; dir<2; dir++){
2867                         for(i=0; i<2; i++){
2868                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2869                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2870                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2871                         }
2872                     }
2873                     break;
2874                 default:
2875                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2876                 }
2877
2878                 encode_mb(s, motion_x, motion_y);
2879
2880                 // RAL: Update last macroblock type
2881                 s->last_mv_dir = s->mv_dir;
2882
2883                 if (CONFIG_H263_ENCODER &&
2884                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2885                     ff_h263_update_motion_val(s);
2886
2887                 ff_MPV_decode_mb(s, s->block);
2888             }
2889
2890             /* clean the MV table in IPS frames for direct mode in B frames */
2891             if(s->mb_intra /* && I,P,S_TYPE */){
2892                 s->p_mv_table[xy][0]=0;
2893                 s->p_mv_table[xy][1]=0;
2894             }
2895
2896             if(s->flags&CODEC_FLAG_PSNR){
2897                 int w= 16;
2898                 int h= 16;
2899
2900                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2901                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2902
2903                 s->current_picture.f.error[0] += sse(
2904                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2905                     s->dest[0], w, h, s->linesize);
2906                 s->current_picture.f.error[1] += sse(
2907                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2908                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2909                 s->current_picture.f.error[2] += sse(
2910                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2911                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2912             }
2913             if(s->loop_filter){
2914                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2915                     ff_h263_loop_filter(s);
2916             }
2917 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
2918         }
2919     }
2920
2921     //not beautiful here but we must write it before flushing so it has to be here
2922     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
2923         ff_msmpeg4_encode_ext_header(s);
2924
2925     write_slice_end(s);
2926
2927     /* Send the last GOB if RTP */
2928     if (s->avctx->rtp_callback) {
2929         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
2930         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
2931         /* Call the RTP callback to send the last GOB */
2932         emms_c();
2933         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
2934     }
2935
2936     return 0;
2937 }
2938
2939 #define MERGE(field) dst->field += src->field; src->field=0
2940 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
2941     MERGE(me.scene_change_score);
2942     MERGE(me.mc_mb_var_sum_temp);
2943     MERGE(me.mb_var_sum_temp);
2944 }
2945
2946 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
2947     int i;
2948
2949     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
2950     MERGE(dct_count[1]);
2951     MERGE(mv_bits);
2952     MERGE(i_tex_bits);
2953     MERGE(p_tex_bits);
2954     MERGE(i_count);
2955     MERGE(f_count);
2956     MERGE(b_count);
2957     MERGE(skip_count);
2958     MERGE(misc_bits);
2959     MERGE(error_count);
2960     MERGE(padding_bug_score);
2961     MERGE(current_picture.f.error[0]);
2962     MERGE(current_picture.f.error[1]);
2963     MERGE(current_picture.f.error[2]);
2964
2965     if(dst->avctx->noise_reduction){
2966         for(i=0; i<64; i++){
2967             MERGE(dct_error_sum[0][i]);
2968             MERGE(dct_error_sum[1][i]);
2969         }
2970     }
2971
2972     assert(put_bits_count(&src->pb) % 8 ==0);
2973     assert(put_bits_count(&dst->pb) % 8 ==0);
2974     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
2975     flush_put_bits(&dst->pb);
2976 }
2977
2978 static int estimate_qp(MpegEncContext *s, int dry_run){
2979     if (s->next_lambda){
2980         s->current_picture_ptr->f.quality =
2981         s->current_picture.f.quality = s->next_lambda;
2982         if(!dry_run) s->next_lambda= 0;
2983     } else if (!s->fixed_qscale) {
2984         s->current_picture_ptr->f.quality =
2985         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
2986         if (s->current_picture.f.quality < 0)
2987             return -1;
2988     }
2989
2990     if(s->adaptive_quant){
2991         switch(s->codec_id){
2992         case CODEC_ID_MPEG4:
2993             if (CONFIG_MPEG4_ENCODER)
2994                 ff_clean_mpeg4_qscales(s);
2995             break;
2996         case CODEC_ID_H263:
2997         case CODEC_ID_H263P:
2998         case CODEC_ID_FLV1:
2999             if (CONFIG_H263_ENCODER)
3000                 ff_clean_h263_qscales(s);
3001             break;
3002         default:
3003             ff_init_qscale_tab(s);
3004         }
3005
3006         s->lambda= s->lambda_table[0];
3007         //FIXME broken
3008     }else
3009         s->lambda = s->current_picture.f.quality;
3010 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3011     update_qscale(s);
3012     return 0;
3013 }
3014
3015 /* must be called before writing the header */
3016 static void set_frame_distances(MpegEncContext * s){
3017     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3018     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3019
3020     if(s->pict_type==AV_PICTURE_TYPE_B){
3021         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3022         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3023     }else{
3024         s->pp_time= s->time - s->last_non_b_time;
3025         s->last_non_b_time= s->time;
3026         assert(s->picture_number==0 || s->pp_time > 0);
3027     }
3028 }
3029
3030 static int encode_picture(MpegEncContext *s, int picture_number)
3031 {
3032     int i;
3033     int bits;
3034     int context_count = s->slice_context_count;
3035
3036     s->picture_number = picture_number;
3037
3038     /* Reset the average MB variance */
3039     s->me.mb_var_sum_temp    =
3040     s->me.mc_mb_var_sum_temp = 0;
3041
3042     /* we need to initialize some time vars before we can encode b-frames */
3043     // RAL: Condition added for MPEG1VIDEO
3044     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3045         set_frame_distances(s);
3046     if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4)
3047         ff_set_mpeg4_time(s);
3048
3049     s->me.scene_change_score=0;
3050
3051 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3052
3053     if(s->pict_type==AV_PICTURE_TYPE_I){
3054         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3055         else                        s->no_rounding=0;
3056     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3057         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3058             s->no_rounding ^= 1;
3059     }
3060
3061     if(s->flags & CODEC_FLAG_PASS2){
3062         if (estimate_qp(s,1) < 0)
3063             return -1;
3064         ff_get_2pass_fcode(s);
3065     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3066         if(s->pict_type==AV_PICTURE_TYPE_B)
3067             s->lambda= s->last_lambda_for[s->pict_type];
3068         else
3069             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3070         update_qscale(s);
3071     }
3072
3073     s->mb_intra=0; //for the rate distortion & bit compare functions
3074     for(i=1; i<context_count; i++){
3075         ff_update_duplicate_context(s->thread_context[i], s);
3076     }
3077
3078     if(ff_init_me(s)<0)
3079         return -1;
3080
3081     /* Estimate motion for every MB */
3082     if(s->pict_type != AV_PICTURE_TYPE_I){
3083         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3084         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3085         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3086             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3087                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3088             }
3089         }
3090
3091         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3092     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3093         /* I-Frame */
3094         for(i=0; i<s->mb_stride*s->mb_height; i++)
3095             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3096
3097         if(!s->fixed_qscale){
3098             /* finding spatial complexity for I-frame rate control */
3099             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3100         }
3101     }
3102     for(i=1; i<context_count; i++){
3103         merge_context_after_me(s, s->thread_context[i]);
3104     }
3105     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3106     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3107     emms_c();
3108
3109     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3110         s->pict_type= AV_PICTURE_TYPE_I;
3111         for(i=0; i<s->mb_stride*s->mb_height; i++)
3112             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3113 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3114     }
3115
3116     if(!s->umvplus){
3117         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3118             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3119
3120             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3121                 int a,b;
3122                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3123                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3124                 s->f_code= FFMAX3(s->f_code, a, b);
3125             }
3126
3127             ff_fix_long_p_mvs(s);
3128             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3129             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3130                 int j;
3131                 for(i=0; i<2; i++){
3132                     for(j=0; j<2; j++)
3133                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3134                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3135                 }
3136             }
3137         }
3138
3139         if(s->pict_type==AV_PICTURE_TYPE_B){
3140             int a, b;
3141
3142             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3143             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3144             s->f_code = FFMAX(a, b);
3145
3146             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3147             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3148             s->b_code = FFMAX(a, b);
3149
3150             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3151             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3152             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3153             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3154             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3155                 int dir, j;
3156                 for(dir=0; dir<2; dir++){
3157                     for(i=0; i<2; i++){
3158                         for(j=0; j<2; j++){
3159                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3160                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3161                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3162                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3163                         }
3164                     }
3165                 }
3166             }
3167         }
3168     }
3169
3170     if (estimate_qp(s, 0) < 0)
3171         return -1;
3172
3173     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3174         s->qscale= 3; //reduce clipping problems
3175
3176     if (s->out_format == FMT_MJPEG) {
3177         /* for mjpeg, we do include qscale in the matrix */
3178         for(i=1;i<64;i++){
3179             int j= s->dsp.idct_permutation[i];
3180
3181             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3182         }
3183         s->y_dc_scale_table=
3184         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3185         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3186         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3187                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3188         s->qscale= 8;
3189     }
3190
3191     //FIXME var duplication
3192     s->current_picture_ptr->f.key_frame =
3193     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3194     s->current_picture_ptr->f.pict_type =
3195     s->current_picture.f.pict_type = s->pict_type;
3196
3197     if (s->current_picture.f.key_frame)
3198         s->picture_in_gop_number=0;
3199
3200     s->last_bits= put_bits_count(&s->pb);
3201     switch(s->out_format) {
3202     case FMT_MJPEG:
3203         if (CONFIG_MJPEG_ENCODER)
3204             ff_mjpeg_encode_picture_header(s);
3205         break;
3206     case FMT_H261:
3207         if (CONFIG_H261_ENCODER)
3208             ff_h261_encode_picture_header(s, picture_number);
3209         break;
3210     case FMT_H263:
3211         if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
3212             ff_wmv2_encode_picture_header(s, picture_number);
3213         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3214             ff_msmpeg4_encode_picture_header(s, picture_number);
3215         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3216             ff_mpeg4_encode_picture_header(s, picture_number);
3217         else if (CONFIG_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
3218             ff_rv10_encode_picture_header(s, picture_number);
3219         else if (CONFIG_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
3220             ff_rv20_encode_picture_header(s, picture_number);
3221         else if (CONFIG_FLV_ENCODER && s->codec_id == CODEC_ID_FLV1)
3222             ff_flv_encode_picture_header(s, picture_number);
3223         else if (CONFIG_H263_ENCODER)
3224             ff_h263_encode_picture_header(s, picture_number);
3225         break;
3226     case FMT_MPEG1:
3227         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3228             ff_mpeg1_encode_picture_header(s, picture_number);
3229         break;
3230     case FMT_H264:
3231         break;
3232     default:
3233         assert(0);
3234     }
3235     bits= put_bits_count(&s->pb);
3236     s->header_bits= bits - s->last_bits;
3237
3238     for(i=1; i<context_count; i++){
3239         update_duplicate_context_after_me(s->thread_context[i], s);
3240     }
3241     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3242     for(i=1; i<context_count; i++){
3243         merge_context_after_encode(s, s->thread_context[i]);
3244     }
3245     emms_c();
3246     return 0;
3247 }
3248
3249 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3250     const int intra= s->mb_intra;
3251     int i;
3252
3253     s->dct_count[intra]++;
3254
3255     for(i=0; i<64; i++){
3256         int level= block[i];
3257
3258         if(level){
3259             if(level>0){
3260                 s->dct_error_sum[intra][i] += level;
3261                 level -= s->dct_offset[intra][i];
3262                 if(level<0) level=0;
3263             }else{
3264                 s->dct_error_sum[intra][i] -= level;
3265                 level += s->dct_offset[intra][i];
3266                 if(level>0) level=0;
3267             }
3268             block[i]= level;
3269         }
3270     }
3271 }
3272
3273 static int dct_quantize_trellis_c(MpegEncContext *s,
3274                                   DCTELEM *block, int n,
3275                                   int qscale, int *overflow){
3276     const int *qmat;
3277     const uint8_t *scantable= s->intra_scantable.scantable;
3278     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3279     int max=0;
3280     unsigned int threshold1, threshold2;
3281     int bias=0;
3282     int run_tab[65];
3283     int level_tab[65];
3284     int score_tab[65];
3285     int survivor[65];
3286     int survivor_count;
3287     int last_run=0;
3288     int last_level=0;
3289     int last_score= 0;
3290     int last_i;
3291     int coeff[2][64];
3292     int coeff_count[64];
3293     int qmul, qadd, start_i, last_non_zero, i, dc;
3294     const int esc_length= s->ac_esc_length;
3295     uint8_t * length;
3296     uint8_t * last_length;
3297     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3298
3299     s->dsp.fdct (block);
3300
3301     if(s->dct_error_sum)
3302         s->denoise_dct(s, block);
3303     qmul= qscale*16;
3304     qadd= ((qscale-1)|1)*8;
3305
3306     if (s->mb_intra) {
3307         int q;
3308         if (!s->h263_aic) {
3309             if (n < 4)
3310                 q = s->y_dc_scale;
3311             else
3312                 q = s->c_dc_scale;
3313             q = q << 3;
3314         } else{
3315             /* For AIC we skip quant/dequant of INTRADC */
3316             q = 1 << 3;
3317             qadd=0;
3318         }
3319
3320         /* note: block[0] is assumed to be positive */
3321         block[0] = (block[0] + (q >> 1)) / q;
3322         start_i = 1;
3323         last_non_zero = 0;
3324         qmat = s->q_intra_matrix[qscale];
3325         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3326             bias= 1<<(QMAT_SHIFT-1);
3327         length     = s->intra_ac_vlc_length;
3328         last_length= s->intra_ac_vlc_last_length;
3329     } else {
3330         start_i = 0;
3331         last_non_zero = -1;
3332         qmat = s->q_inter_matrix[qscale];
3333         length     = s->inter_ac_vlc_length;
3334         last_length= s->inter_ac_vlc_last_length;
3335     }
3336     last_i= start_i;
3337
3338     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3339     threshold2= (threshold1<<1);
3340
3341     for(i=63; i>=start_i; i--) {
3342         const int j = scantable[i];
3343         int level = block[j] * qmat[j];
3344
3345         if(((unsigned)(level+threshold1))>threshold2){
3346             last_non_zero = i;
3347             break;
3348         }
3349     }
3350
3351     for(i=start_i; i<=last_non_zero; i++) {
3352         const int j = scantable[i];
3353         int level = block[j] * qmat[j];
3354
3355 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3356 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3357         if(((unsigned)(level+threshold1))>threshold2){
3358             if(level>0){
3359                 level= (bias + level)>>QMAT_SHIFT;
3360                 coeff[0][i]= level;
3361                 coeff[1][i]= level-1;
3362 //                coeff[2][k]= level-2;
3363             }else{
3364                 level= (bias - level)>>QMAT_SHIFT;
3365                 coeff[0][i]= -level;
3366                 coeff[1][i]= -level+1;
3367 //                coeff[2][k]= -level+2;
3368             }
3369             coeff_count[i]= FFMIN(level, 2);
3370             assert(coeff_count[i]);
3371             max |=level;
3372         }else{
3373             coeff[0][i]= (level>>31)|1;
3374             coeff_count[i]= 1;
3375         }
3376     }
3377
3378     *overflow= s->max_qcoeff < max; //overflow might have happened
3379
3380     if(last_non_zero < start_i){
3381         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3382         return last_non_zero;
3383     }
3384
3385     score_tab[start_i]= 0;
3386     survivor[0]= start_i;
3387     survivor_count= 1;
3388
3389     for(i=start_i; i<=last_non_zero; i++){
3390         int level_index, j, zero_distortion;
3391         int dct_coeff= FFABS(block[ scantable[i] ]);
3392         int best_score=256*256*256*120;
3393
3394         if (   s->dsp.fdct == ff_fdct_ifast
3395 #ifndef FAAN_POSTSCALE
3396             || s->dsp.fdct == ff_faandct
3397 #endif
3398            )
3399             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3400         zero_distortion= dct_coeff*dct_coeff;
3401
3402         for(level_index=0; level_index < coeff_count[i]; level_index++){
3403             int distortion;
3404             int level= coeff[level_index][i];
3405             const int alevel= FFABS(level);
3406             int unquant_coeff;
3407
3408             assert(level);
3409
3410             if(s->out_format == FMT_H263){
3411                 unquant_coeff= alevel*qmul + qadd;
3412             }else{ //MPEG1
3413                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3414                 if(s->mb_intra){
3415                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3416                         unquant_coeff =   (unquant_coeff - 1) | 1;
3417                 }else{
3418                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3419                         unquant_coeff =   (unquant_coeff - 1) | 1;
3420                 }
3421                 unquant_coeff<<= 3;
3422             }
3423
3424             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3425             level+=64;
3426             if((level&(~127)) == 0){
3427                 for(j=survivor_count-1; j>=0; j--){
3428                     int run= i - survivor[j];
3429                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3430                     score += score_tab[i-run];
3431
3432                     if(score < best_score){
3433                         best_score= score;
3434                         run_tab[i+1]= run;
3435                         level_tab[i+1]= level-64;
3436                     }
3437                 }
3438
3439                 if(s->out_format == FMT_H263){
3440                     for(j=survivor_count-1; j>=0; j--){
3441                         int run= i - survivor[j];
3442                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3443                         score += score_tab[i-run];
3444                         if(score < last_score){
3445                             last_score= score;
3446                             last_run= run;
3447                             last_level= level-64;
3448                             last_i= i+1;
3449                         }
3450                     }
3451                 }
3452             }else{
3453                 distortion += esc_length*lambda;
3454                 for(j=survivor_count-1; j>=0; j--){
3455                     int run= i - survivor[j];
3456                     int score= distortion + score_tab[i-run];
3457
3458                     if(score < best_score){
3459                         best_score= score;
3460                         run_tab[i+1]= run;
3461                         level_tab[i+1]= level-64;
3462                     }
3463                 }
3464
3465                 if(s->out_format == FMT_H263){
3466                   for(j=survivor_count-1; j>=0; j--){
3467                         int run= i - survivor[j];
3468                         int score= distortion + score_tab[i-run];
3469                         if(score < last_score){
3470                             last_score= score;
3471                             last_run= run;
3472                             last_level= level-64;
3473                             last_i= i+1;
3474                         }
3475                     }
3476                 }
3477             }
3478         }
3479
3480         score_tab[i+1]= best_score;
3481
3482         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3483         if(last_non_zero <= 27){
3484             for(; survivor_count; survivor_count--){
3485                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3486                     break;
3487             }
3488         }else{
3489             for(; survivor_count; survivor_count--){
3490                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3491                     break;
3492             }
3493         }
3494
3495         survivor[ survivor_count++ ]= i+1;
3496     }
3497
3498     if(s->out_format != FMT_H263){
3499         last_score= 256*256*256*120;
3500         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3501             int score= score_tab[i];
3502             if(i) score += lambda*2; //FIXME exacter?
3503
3504             if(score < last_score){
3505                 last_score= score;
3506                 last_i= i;
3507                 last_level= level_tab[i];
3508                 last_run= run_tab[i];
3509             }
3510         }
3511     }
3512
3513     s->coded_score[n] = last_score;
3514
3515     dc= FFABS(block[0]);
3516     last_non_zero= last_i - 1;
3517     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3518
3519     if(last_non_zero < start_i)
3520         return last_non_zero;
3521
3522     if(last_non_zero == 0 && start_i == 0){
3523         int best_level= 0;
3524         int best_score= dc * dc;
3525
3526         for(i=0; i<coeff_count[0]; i++){
3527             int level= coeff[i][0];
3528             int alevel= FFABS(level);
3529             int unquant_coeff, score, distortion;
3530
3531             if(s->out_format == FMT_H263){
3532                     unquant_coeff= (alevel*qmul + qadd)>>3;
3533             }else{ //MPEG1
3534                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3535                     unquant_coeff =   (unquant_coeff - 1) | 1;
3536             }
3537             unquant_coeff = (unquant_coeff + 4) >> 3;
3538             unquant_coeff<<= 3 + 3;
3539
3540             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3541             level+=64;
3542             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3543             else                    score= distortion + esc_length*lambda;
3544
3545             if(score < best_score){
3546                 best_score= score;
3547                 best_level= level - 64;
3548             }
3549         }
3550         block[0]= best_level;
3551         s->coded_score[n] = best_score - dc*dc;
3552         if(best_level == 0) return -1;
3553         else                return last_non_zero;
3554     }
3555
3556     i= last_i;
3557     assert(last_level);
3558
3559     block[ perm_scantable[last_non_zero] ]= last_level;
3560     i -= last_run + 1;
3561
3562     for(; i>start_i; i -= run_tab[i] + 1){
3563         block[ perm_scantable[i-1] ]= level_tab[i];
3564     }
3565
3566     return last_non_zero;
3567 }
3568
3569 //#define REFINE_STATS 1
3570 static int16_t basis[64][64];
3571
3572 static void build_basis(uint8_t *perm){
3573     int i, j, x, y;
3574     emms_c();
3575     for(i=0; i<8; i++){
3576         for(j=0; j<8; j++){
3577             for(y=0; y<8; y++){
3578                 for(x=0; x<8; x++){
3579                     double s= 0.25*(1<<BASIS_SHIFT);
3580                     int index= 8*i + j;
3581                     int perm_index= perm[index];
3582                     if(i==0) s*= sqrt(0.5);
3583                     if(j==0) s*= sqrt(0.5);
3584                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3585                 }
3586             }
3587         }
3588     }
3589 }
3590
3591 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3592                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3593                         int n, int qscale){
3594     int16_t rem[64];
3595     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3596     const uint8_t *scantable= s->intra_scantable.scantable;
3597     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3598 //    unsigned int threshold1, threshold2;
3599 //    int bias=0;
3600     int run_tab[65];
3601     int prev_run=0;
3602     int prev_level=0;
3603     int qmul, qadd, start_i, last_non_zero, i, dc;
3604     uint8_t * length;
3605     uint8_t * last_length;
3606     int lambda;
3607     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3608 #ifdef REFINE_STATS
3609 static int count=0;
3610 static int after_last=0;
3611 static int to_zero=0;
3612 static int from_zero=0;
3613 static int raise=0;
3614 static int lower=0;
3615 static int messed_sign=0;
3616 #endif
3617
3618     if(basis[0][0] == 0)
3619         build_basis(s->dsp.idct_permutation);
3620
3621     qmul= qscale*2;
3622     qadd= (qscale-1)|1;
3623     if (s->mb_intra) {
3624         if (!s->h263_aic) {
3625             if (n < 4)
3626                 q = s->y_dc_scale;
3627             else
3628                 q = s->c_dc_scale;
3629         } else{
3630             /* For AIC we skip quant/dequant of INTRADC */
3631             q = 1;
3632             qadd=0;
3633         }
3634         q <<= RECON_SHIFT-3;
3635         /* note: block[0] is assumed to be positive */
3636         dc= block[0]*q;
3637 //        block[0] = (block[0] + (q >> 1)) / q;
3638         start_i = 1;
3639 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3640 //            bias= 1<<(QMAT_SHIFT-1);
3641         length     = s->intra_ac_vlc_length;
3642         last_length= s->intra_ac_vlc_last_length;
3643     } else {
3644         dc= 0;
3645         start_i = 0;
3646         length     = s->inter_ac_vlc_length;
3647         last_length= s->inter_ac_vlc_last_length;
3648     }
3649     last_non_zero = s->block_last_index[n];
3650
3651 #ifdef REFINE_STATS
3652 {START_TIMER
3653 #endif
3654     dc += (1<<(RECON_SHIFT-1));
3655     for(i=0; i<64; i++){
3656         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3657     }
3658 #ifdef REFINE_STATS
3659 STOP_TIMER("memset rem[]")}
3660 #endif
3661     sum=0;
3662     for(i=0; i<64; i++){
3663         int one= 36;
3664         int qns=4;
3665         int w;
3666
3667         w= FFABS(weight[i]) + qns*one;
3668         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3669
3670         weight[i] = w;
3671 //        w=weight[i] = (63*qns + (w/2)) / w;
3672
3673         assert(w>0);
3674         assert(w<(1<<6));
3675         sum += w*w;
3676     }
3677     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3678 #ifdef REFINE_STATS
3679 {START_TIMER
3680 #endif
3681     run=0;
3682     rle_index=0;
3683     for(i=start_i; i<=last_non_zero; i++){
3684         int j= perm_scantable[i];
3685         const int level= block[j];
3686         int coeff;
3687
3688         if(level){
3689             if(level<0) coeff= qmul*level - qadd;
3690             else        coeff= qmul*level + qadd;
3691             run_tab[rle_index++]=run;
3692             run=0;
3693
3694             s->dsp.add_8x8basis(rem, basis[j], coeff);
3695         }else{
3696             run++;
3697         }
3698     }
3699 #ifdef REFINE_STATS
3700 if(last_non_zero>0){
3701 STOP_TIMER("init rem[]")
3702 }
3703 }
3704
3705 {START_TIMER
3706 #endif
3707     for(;;){
3708         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3709         int best_coeff=0;
3710         int best_change=0;
3711         int run2, best_unquant_change=0, analyze_gradient;
3712 #ifdef REFINE_STATS
3713 {START_TIMER
3714 #endif
3715         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
3716
3717         if(analyze_gradient){
3718 #ifdef REFINE_STATS
3719 {START_TIMER
3720 #endif
3721             for(i=0; i<64; i++){
3722                 int w= weight[i];
3723
3724                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3725             }
3726 #ifdef REFINE_STATS
3727 STOP_TIMER("rem*w*w")}
3728 {START_TIMER
3729 #endif
3730             s->dsp.fdct(d1);
3731 #ifdef REFINE_STATS
3732 STOP_TIMER("dct")}
3733 #endif
3734         }
3735
3736         if(start_i){
3737             const int level= block[0];
3738             int change, old_coeff;
3739
3740             assert(s->mb_intra);
3741
3742             old_coeff= q*level;
3743
3744             for(change=-1; change<=1; change+=2){
3745                 int new_level= level + change;
3746                 int score, new_coeff;
3747
3748                 new_coeff= q*new_level;
3749                 if(new_coeff >= 2048 || new_coeff < 0)
3750                     continue;
3751
3752                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3753                 if(score<best_score){
3754                     best_score= score;
3755                     best_coeff= 0;
3756                     best_change= change;
3757                     best_unquant_change= new_coeff - old_coeff;
3758                 }
3759             }
3760         }
3761
3762         run=0;
3763         rle_index=0;
3764         run2= run_tab[rle_index++];
3765         prev_level=0;
3766         prev_run=0;
3767
3768         for(i=start_i; i<64; i++){
3769             int j= perm_scantable[i];
3770             const int level= block[j];
3771             int change, old_coeff;
3772
3773             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3774                 break;
3775
3776             if(level){
3777                 if(level<0) old_coeff= qmul*level - qadd;
3778                 else        old_coeff= qmul*level + qadd;
3779                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3780             }else{
3781                 old_coeff=0;
3782                 run2--;
3783                 assert(run2>=0 || i >= last_non_zero );
3784             }
3785
3786             for(change=-1; change<=1; change+=2){
3787                 int new_level= level + change;
3788                 int score, new_coeff, unquant_change;
3789
3790                 score=0;
3791                 if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3792                    continue;
3793
3794                 if(new_level){
3795                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3796                     else            new_coeff= qmul*new_level + qadd;
3797                     if(new_coeff >= 2048 || new_coeff <= -2048)
3798                         continue;
3799                     //FIXME check for overflow
3800
3801                     if(level){
3802                         if(level < 63 && level > -63){
3803                             if(i < last_non_zero)
3804                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3805                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3806                             else
3807                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3808                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3809                         }
3810                     }else{
3811                         assert(FFABS(new_level)==1);
3812
3813                         if(analyze_gradient){
3814                             int g= d1[ scantable[i] ];
3815                             if(g && (g^new_level) >= 0)
3816                                 continue;
3817                         }
3818
3819                         if(i < last_non_zero){
3820                             int next_i= i + run2 + 1;
3821                             int next_level= block[ perm_scantable[next_i] ] + 64;
3822
3823                             if(next_level&(~127))
3824                                 next_level= 0;
3825
3826                             if(next_i < last_non_zero)
3827                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3828                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3829                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3830                             else
3831                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3832                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3833                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3834                         }else{
3835                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3836                             if(prev_level){
3837                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3838                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3839                             }
3840                         }
3841                     }
3842                 }else{
3843                     new_coeff=0;
3844                     assert(FFABS(level)==1);
3845
3846                     if(i < last_non_zero){
3847                         int next_i= i + run2 + 1;
3848                         int next_level= block[ perm_scantable[next_i] ] + 64;
3849
3850                         if(next_level&(~127))
3851                             next_level= 0;
3852
3853                         if(next_i < last_non_zero)
3854                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3855                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3856                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3857                         else
3858                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3859                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3860                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3861                     }else{
3862                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3863                         if(prev_level){
3864                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3865                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3866                         }
3867                     }
3868                 }
3869
3870                 score *= lambda;
3871
3872                 unquant_change= new_coeff - old_coeff;
3873                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3874
3875                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3876                 if(score<best_score){
3877                     best_score= score;
3878                     best_coeff= i;
3879                     best_change= change;
3880                     best_unquant_change= unquant_change;
3881                 }
3882             }
3883             if(level){
3884                 prev_level= level + 64;
3885                 if(prev_level&(~127))
3886                     prev_level= 0;
3887                 prev_run= run;
3888                 run=0;
3889             }else{
3890                 run++;
3891             }
3892         }
3893 #ifdef REFINE_STATS
3894 STOP_TIMER("iterative step")}
3895 #endif
3896
3897         if(best_change){
3898             int j= perm_scantable[ best_coeff ];
3899
3900             block[j] += best_change;
3901
3902             if(best_coeff > last_non_zero){
3903                 last_non_zero= best_coeff;
3904                 assert(block[j]);
3905 #ifdef REFINE_STATS
3906 after_last++;
3907 #endif
3908             }else{
3909 #ifdef REFINE_STATS
3910 if(block[j]){
3911     if(block[j] - best_change){
3912         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3913             raise++;
3914         }else{
3915             lower++;
3916         }
3917     }else{
3918         from_zero++;
3919     }
3920 }else{
3921     to_zero++;
3922 }
3923 #endif
3924                 for(; last_non_zero>=start_i; last_non_zero--){
3925                     if(block[perm_scantable[last_non_zero]])
3926                         break;
3927                 }
3928             }
3929 #ifdef REFINE_STATS
3930 count++;
3931 if(256*256*256*64 % count == 0){
3932     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
3933 }
3934 #endif
3935             run=0;
3936             rle_index=0;
3937             for(i=start_i; i<=last_non_zero; i++){
3938                 int j= perm_scantable[i];
3939                 const int level= block[j];
3940
3941                  if(level){
3942                      run_tab[rle_index++]=run;
3943                      run=0;
3944                  }else{
3945                      run++;
3946                  }
3947             }
3948
3949             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
3950         }else{
3951             break;
3952         }
3953     }
3954 #ifdef REFINE_STATS
3955 if(last_non_zero>0){
3956 STOP_TIMER("iterative search")
3957 }
3958 }
3959 #endif
3960
3961     return last_non_zero;
3962 }
3963
3964 int ff_dct_quantize_c(MpegEncContext *s,
3965                         DCTELEM *block, int n,
3966                         int qscale, int *overflow)
3967 {
3968     int i, j, level, last_non_zero, q, start_i;
3969     const int *qmat;
3970     const uint8_t *scantable= s->intra_scantable.scantable;
3971     int bias;
3972     int max=0;
3973     unsigned int threshold1, threshold2;
3974
3975     s->dsp.fdct (block);
3976
3977     if(s->dct_error_sum)
3978         s->denoise_dct(s, block);
3979
3980     if (s->mb_intra) {
3981         if (!s->h263_aic) {
3982             if (n < 4)
3983                 q = s->y_dc_scale;
3984             else
3985                 q = s->c_dc_scale;
3986             q = q << 3;
3987         } else
3988             /* For AIC we skip quant/dequant of INTRADC */
3989             q = 1 << 3;
3990
3991         /* note: block[0] is assumed to be positive */
3992         block[0] = (block[0] + (q >> 1)) / q;
3993         start_i = 1;
3994         last_non_zero = 0;
3995         qmat = s->q_intra_matrix[qscale];
3996         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3997     } else {
3998         start_i = 0;
3999         last_non_zero = -1;
4000         qmat = s->q_inter_matrix[qscale];
4001         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4002     }
4003     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4004     threshold2= (threshold1<<1);
4005     for(i=63;i>=start_i;i--) {
4006         j = scantable[i];
4007         level = block[j] * qmat[j];
4008
4009         if(((unsigned)(level+threshold1))>threshold2){
4010             last_non_zero = i;
4011             break;
4012         }else{
4013             block[j]=0;
4014         }
4015     }
4016     for(i=start_i; i<=last_non_zero; i++) {
4017         j = scantable[i];
4018         level = block[j] * qmat[j];
4019
4020 //        if(   bias+level >= (1<<QMAT_SHIFT)
4021 //           || bias-level >= (1<<QMAT_SHIFT)){
4022         if(((unsigned)(level+threshold1))>threshold2){
4023             if(level>0){
4024                 level= (bias + level)>>QMAT_SHIFT;
4025                 block[j]= level;
4026             }else{
4027                 level= (bias - level)>>QMAT_SHIFT;
4028                 block[j]= -level;
4029             }
4030             max |=level;
4031         }else{
4032             block[j]=0;
4033         }
4034     }
4035     *overflow= s->max_qcoeff < max; //overflow might have happened
4036
4037     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4038     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4039         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4040
4041     return last_non_zero;
4042 }
4043
4044 #define OFFSET(x) offsetof(MpegEncContext, x)
4045 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4046 static const AVOption h263_options[] = {
4047     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4048     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4049     { NULL },
4050 };
4051
4052 static const AVClass h263_class = {
4053     .class_name = "H.263 encoder",
4054     .item_name  = av_default_item_name,
4055     .option     = h263_options,
4056     .version    = LIBAVUTIL_VERSION_INT,
4057 };
4058
4059 AVCodec ff_h263_encoder = {
4060     .name           = "h263",
4061     .type           = AVMEDIA_TYPE_VIDEO,
4062     .id             = CODEC_ID_H263,
4063     .priv_data_size = sizeof(MpegEncContext),
4064     .init           = ff_MPV_encode_init,
4065     .encode2        = ff_MPV_encode_picture,
4066     .close          = ff_MPV_encode_end,
4067     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4068     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4069     .priv_class     = &h263_class,
4070 };
4071
4072 static const AVOption h263p_options[] = {
4073     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4074     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4075     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4076     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4077     { NULL },
4078 };
4079 static const AVClass h263p_class = {
4080     .class_name = "H.263p encoder",
4081     .item_name  = av_default_item_name,
4082     .option     = h263p_options,
4083     .version    = LIBAVUTIL_VERSION_INT,
4084 };
4085
4086 AVCodec ff_h263p_encoder = {
4087     .name           = "h263p",
4088     .type           = AVMEDIA_TYPE_VIDEO,
4089     .id             = CODEC_ID_H263P,
4090     .priv_data_size = sizeof(MpegEncContext),
4091     .init           = ff_MPV_encode_init,
4092     .encode2        = ff_MPV_encode_picture,
4093     .close          = ff_MPV_encode_end,
4094     .capabilities = CODEC_CAP_SLICE_THREADS,
4095     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4096     .long_name= NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4097     .priv_class     = &h263p_class,
4098 };
4099
4100 AVCodec ff_msmpeg4v2_encoder = {
4101     .name           = "msmpeg4v2",
4102     .type           = AVMEDIA_TYPE_VIDEO,
4103     .id             = CODEC_ID_MSMPEG4V2,
4104     .priv_data_size = sizeof(MpegEncContext),
4105     .init           = ff_MPV_encode_init,
4106     .encode2        = ff_MPV_encode_picture,
4107     .close          = ff_MPV_encode_end,
4108     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4109     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4110 };
4111
4112 AVCodec ff_msmpeg4v3_encoder = {
4113     .name           = "msmpeg4",
4114     .type           = AVMEDIA_TYPE_VIDEO,
4115     .id             = CODEC_ID_MSMPEG4V3,
4116     .priv_data_size = sizeof(MpegEncContext),
4117     .init           = ff_MPV_encode_init,
4118     .encode2        = ff_MPV_encode_picture,
4119     .close          = ff_MPV_encode_end,
4120     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4121     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4122 };
4123
4124 AVCodec ff_wmv1_encoder = {
4125     .name           = "wmv1",
4126     .type           = AVMEDIA_TYPE_VIDEO,
4127     .id             = CODEC_ID_WMV1,
4128     .priv_data_size = sizeof(MpegEncContext),
4129     .init           = ff_MPV_encode_init,
4130     .encode2        = ff_MPV_encode_picture,
4131     .close          = ff_MPV_encode_end,
4132     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4133     .long_name= NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4134 };