git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include "libavutil/intmath.h"
  31 #include "libavutil/mathematics.h"
  32 #include "libavutil/opt.h"
  33 #include "avcodec.h"
  34 #include "dsputil.h"
  35 #include "mpegvideo.h"
  36 #include "h263.h"
  37 #include "mathops.h"
  38 #include "mjpegenc.h"
  39 #include "msmpeg4.h"
  40 #include "faandct.h"
  41 #include "thread.h"
  42 #include "aandcttab.h"
  43 #include "flv.h"
  44 #include "mpeg4video.h"
  45 #include "internal.h"
  46 #include "bytestream.h"
  47 #include <limits.h>
  48
  49 //#undef NDEBUG
  50 //#include <assert.h>
  51
  52 static int encode_picture(MpegEncContext *s, int picture_number);
  53 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  54 static int sse_mb(MpegEncContext *s);
  55 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  56 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  57
  58 /* enable all paranoid tests for rounding, overflows, etc... */
  59 //#define PARANOID
  60
  61 //#define DEBUG
  62
  63 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  64 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  65
  66 const AVOption ff_mpv_generic_options[] = {
  67     FF_MPV_COMMON_OPTS
  68     { NULL },
  69 };
  70
  71 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
  72                        uint16_t (*qmat16)[2][64],
  73                        const uint16_t *quant_matrix,
  74                        int bias, int qmin, int qmax, int intra)
  75 {
  76     int qscale;
  77     int shift = 0;
  78
  79     for (qscale = qmin; qscale <= qmax; qscale++) {
  80         int i;
  81         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
  82             dsp->fdct == ff_jpeg_fdct_islow_10 ||
  83             dsp->fdct == ff_faandct) {
  84             for (i = 0; i < 64; i++) {
  85                 const int j = dsp->idct_permutation[i];
  86                 /* 16 <= qscale * quant_matrix[i] <= 7905
  87                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  88                  *             19952 <=              x  <= 249205026
  89                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
  90                  *           3444240 >= (1 << 36) / (x) >= 275 */
  91
  92                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
  93                                         (qscale * quant_matrix[j]));
  94             }
  95         } else if (dsp->fdct == ff_fdct_ifast) {
  96             for (i = 0; i < 64; i++) {
  97                 const int j = dsp->idct_permutation[i];
  98                 /* 16 <= qscale * quant_matrix[i] <= 7905
  99                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 100                  *             19952 <=              x  <= 249205026
 101                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 102                  *           3444240 >= (1 << 36) / (x) >= 275 */
 103
 104                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 105                                         (ff_aanscales[i] * qscale *
 106                                          quant_matrix[j]));
 107             }
 108         } else {
 109             for (i = 0; i < 64; i++) {
 110                 const int j = dsp->idct_permutation[i];
 111                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 112                  * Assume x = qscale * quant_matrix[i]
 113                  * So             16 <=              x  <= 7905
 114                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 115                  * so          32768 >= (1 << 19) / (x) >= 67 */
 116                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 117                                         (qscale * quant_matrix[j]));
 118                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 119                 //                    (qscale * quant_matrix[i]);
 120                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 121                                        (qscale * quant_matrix[j]);
 122
 123                 if (qmat16[qscale][0][i] == 0 ||
 124                     qmat16[qscale][0][i] == 128 * 256)
 125                     qmat16[qscale][0][i] = 128 * 256 - 1;
 126                 qmat16[qscale][1][i] =
 127                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 128                                 qmat16[qscale][0][i]);
 129             }
 130         }
 131
 132         for (i = intra; i < 64; i++) {
 133             int64_t max = 8191;
 134             if (dsp->fdct == ff_fdct_ifast) {
 135                 max = (8191LL * ff_aanscales[i]) >> 14;
 136             }
 137             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 138                 shift++;
 139             }
 140         }
 141     }
 142     if (shift) {
 143         av_log(NULL, AV_LOG_INFO,
 144                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 145                QMAT_SHIFT - shift);
 146     }
 147 }
 148
 149 static inline void update_qscale(MpegEncContext *s)
 150 {
 151     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 152                 (FF_LAMBDA_SHIFT + 7);
 153     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 154
 155     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 156                  FF_LAMBDA_SHIFT;
 157 }
 158
 159 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 160 {
 161     int i;
 162
 163     if (matrix) {
 164         put_bits(pb, 1, 1);
 165         for (i = 0; i < 64; i++) {
 166             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 167         }
 168     } else
 169         put_bits(pb, 1, 0);
 170 }
 171
 172 /**
 173  * init s->current_picture.qscale_table from s->lambda_table
 174  */
 175 void ff_init_qscale_tab(MpegEncContext *s)
 176 {
 177     int8_t * const qscale_table = s->current_picture.f.qscale_table;
 178     int i;
 179
 180     for (i = 0; i < s->mb_num; i++) {
 181         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 182         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 183         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 184                                                   s->avctx->qmax);
 185     }
 186 }
 187
 188 static void copy_picture_attributes(MpegEncContext *s,
 189                                     AVFrame *dst,
 190                                     AVFrame *src)
 191 {
 192     int i;
 193
 194     dst->pict_type              = src->pict_type;
 195     dst->quality                = src->quality;
 196     dst->coded_picture_number   = src->coded_picture_number;
 197     dst->display_picture_number = src->display_picture_number;
 198     //dst->reference              = src->reference;
 199     dst->pts                    = src->pts;
 200     dst->interlaced_frame       = src->interlaced_frame;
 201     dst->top_field_first        = src->top_field_first;
 202
 203     if (s->avctx->me_threshold) {
 204         if (!src->motion_val[0])
 205             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
 206         if (!src->mb_type)
 207             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
 208         if (!src->ref_index[0])
 209             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
 210         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
 211             av_log(s->avctx, AV_LOG_ERROR,
 212                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
 213                    src->motion_subsample_log2, dst->motion_subsample_log2);
 214
 215         memcpy(dst->mb_type, src->mb_type,
 216                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
 217
 218         for (i = 0; i < 2; i++) {
 219             int stride = ((16 * s->mb_width ) >>
 220                           src->motion_subsample_log2) + 1;
 221             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
 222
 223             if (src->motion_val[i] &&
 224                 src->motion_val[i] != dst->motion_val[i]) {
 225                 memcpy(dst->motion_val[i], src->motion_val[i],
 226                        2 * stride * height * sizeof(int16_t));
 227             }
 228             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
 229                 memcpy(dst->ref_index[i], src->ref_index[i],
 230                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
 231             }
 232         }
 233     }
 234 }
 235
 236 static void update_duplicate_context_after_me(MpegEncContext *dst,
 237                                               MpegEncContext *src)
 238 {
 239 #define COPY(a) dst->a= src->a
 240     COPY(pict_type);
 241     COPY(current_picture);
 242     COPY(f_code);
 243     COPY(b_code);
 244     COPY(qscale);
 245     COPY(lambda);
 246     COPY(lambda2);
 247     COPY(picture_in_gop_number);
 248     COPY(gop_picture_number);
 249     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 250     COPY(progressive_frame);    // FIXME don't set in encode_header
 251     COPY(partitioned_frame);    // FIXME don't set in encode_header
 252 #undef COPY
 253 }
 254
 255 /**
 256  * Set the given MpegEncContext to defaults for encoding.
 257  * the changed fields will not depend upon the prior state of the MpegEncContext.
 258  */
 259 static void MPV_encode_defaults(MpegEncContext *s)
 260 {
 261     int i;
 262     ff_MPV_common_defaults(s);
 263
 264     for (i = -16; i < 16; i++) {
 265         default_fcode_tab[i + MAX_MV] = 1;
 266     }
 267     s->me.mv_penalty = default_mv_penalty;
 268     s->fcode_tab     = default_fcode_tab;
 269 }
 270
 271 /* init video encoder */
 272 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
 273 {
 274     MpegEncContext *s = avctx->priv_data;
 275     int i;
 276     int chroma_h_shift, chroma_v_shift;
 277
 278     MPV_encode_defaults(s);
 279
 280     switch (avctx->codec_id) {
 281     case AV_CODEC_ID_MPEG2VIDEO:
 282         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 283             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 284             av_log(avctx, AV_LOG_ERROR,
 285                    "only YUV420 and YUV422 are supported\n");
 286             return -1;
 287         }
 288         break;
 289     case AV_CODEC_ID_LJPEG:
 290         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
 291             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
 292             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
 293             avctx->pix_fmt != AV_PIX_FMT_BGRA     &&
 294             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 295               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
 296               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
 297              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
 298             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
 299             return -1;
 300         }
 301         break;
 302     case AV_CODEC_ID_MJPEG:
 303         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
 304             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
 305             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 306               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
 307              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
 308             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 309             return -1;
 310         }
 311         break;
 312     default:
 313         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 314             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 315             return -1;
 316         }
 317     }
 318
 319     switch (avctx->pix_fmt) {
 320     case AV_PIX_FMT_YUVJ422P:
 321     case AV_PIX_FMT_YUV422P:
 322         s->chroma_format = CHROMA_422;
 323         break;
 324     case AV_PIX_FMT_YUVJ420P:
 325     case AV_PIX_FMT_YUV420P:
 326     default:
 327         s->chroma_format = CHROMA_420;
 328         break;
 329     }
 330
 331     s->bit_rate = avctx->bit_rate;
 332     s->width    = avctx->width;
 333     s->height   = avctx->height;
 334     if (avctx->gop_size > 600 &&
 335         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 336         av_log(avctx, AV_LOG_ERROR,
 337                "Warning keyframe interval too large! reducing it ...\n");
 338         avctx->gop_size = 600;
 339     }
 340     s->gop_size     = avctx->gop_size;
 341     s->avctx        = avctx;
 342     s->flags        = avctx->flags;
 343     s->flags2       = avctx->flags2;
 344     s->max_b_frames = avctx->max_b_frames;
 345     s->codec_id     = avctx->codec->id;
 346 #if FF_API_MPV_GLOBAL_OPTS
 347     if (avctx->luma_elim_threshold)
 348         s->luma_elim_threshold   = avctx->luma_elim_threshold;
 349     if (avctx->chroma_elim_threshold)
 350         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
 351 #endif
 352     s->strict_std_compliance = avctx->strict_std_compliance;
 353     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 354     s->mpeg_quant         = avctx->mpeg_quant;
 355     s->rtp_mode           = !!avctx->rtp_payload_size;
 356     s->intra_dc_precision = avctx->intra_dc_precision;
 357     s->user_specified_pts = AV_NOPTS_VALUE;
 358
 359     if (s->gop_size <= 1) {
 360         s->intra_only = 1;
 361         s->gop_size   = 12;
 362     } else {
 363         s->intra_only = 0;
 364     }
 365
 366     s->me_method = avctx->me_method;
 367
 368     /* Fixed QSCALE */
 369     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 370
 371 #if FF_API_MPV_GLOBAL_OPTS
 372     if (s->flags & CODEC_FLAG_QP_RD)
 373         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
 374 #endif
 375
 376     s->adaptive_quant = (s->avctx->lumi_masking ||
 377                          s->avctx->dark_masking ||
 378                          s->avctx->temporal_cplx_masking ||
 379                          s->avctx->spatial_cplx_masking  ||
 380                          s->avctx->p_masking      ||
 381                          s->avctx->border_masking ||
 382                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 383                         !s->fixed_qscale;
 384
 385     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 386
 387     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 388         av_log(avctx, AV_LOG_ERROR,
 389                "a vbv buffer size is needed, "
 390                "for encoding with a maximum bitrate\n");
 391         return -1;
 392     }
 393
 394     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 395         av_log(avctx, AV_LOG_INFO,
 396                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 397     }
 398
 399     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 400         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 401         return -1;
 402     }
 403
 404     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 405         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
 406         return -1;
 407     }
 408
 409     if (avctx->rc_max_rate &&
 410         avctx->rc_max_rate == avctx->bit_rate &&
 411         avctx->rc_max_rate != avctx->rc_min_rate) {
 412         av_log(avctx, AV_LOG_INFO,
 413                "impossible bitrate constraints, this will fail\n");
 414     }
 415
 416     if (avctx->rc_buffer_size &&
 417         avctx->bit_rate * (int64_t)avctx->time_base.num >
 418             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 419         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 420         return -1;
 421     }
 422
 423     if (!s->fixed_qscale &&
 424         avctx->bit_rate * av_q2d(avctx->time_base) >
 425             avctx->bit_rate_tolerance) {
 426         av_log(avctx, AV_LOG_ERROR,
 427                "bitrate tolerance too small for bitrate\n");
 428         return -1;
 429     }
 430
 431     if (s->avctx->rc_max_rate &&
 432         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 433         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 434          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 435         90000LL * (avctx->rc_buffer_size - 1) >
 436             s->avctx->rc_max_rate * 0xFFFFLL) {
 437         av_log(avctx, AV_LOG_INFO,
 438                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 439                "specified vbv buffer is too large for the given bitrate!\n");
 440     }
 441
 442     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 443         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 444         s->codec_id != AV_CODEC_ID_FLV1) {
 445         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 446         return -1;
 447     }
 448
 449     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 450         av_log(avctx, AV_LOG_ERROR,
 451                "OBMC is only supported with simple mb decision\n");
 452         return -1;
 453     }
 454
 455     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 456         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 457         return -1;
 458     }
 459
 460     if (s->max_b_frames                    &&
 461         s->codec_id != AV_CODEC_ID_MPEG4      &&
 462         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 463         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 464         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 465         return -1;
 466     }
 467
 468     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 469          s->codec_id == AV_CODEC_ID_H263  ||
 470          s->codec_id == AV_CODEC_ID_H263P) &&
 471         (avctx->sample_aspect_ratio.num > 255 ||
 472          avctx->sample_aspect_ratio.den > 255)) {
 473         av_log(avctx, AV_LOG_ERROR,
 474                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
 475                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 476         return -1;
 477     }
 478
 479     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 480         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 481         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 482         return -1;
 483     }
 484
 485     // FIXME mpeg2 uses that too
 486     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
 487         av_log(avctx, AV_LOG_ERROR,
 488                "mpeg2 style quantization not supported by codec\n");
 489         return -1;
 490     }
 491
 492 #if FF_API_MPV_GLOBAL_OPTS
 493     if (s->flags & CODEC_FLAG_CBP_RD)
 494         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
 495 #endif
 496
 497     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 498         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 499         return -1;
 500     }
 501
 502     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 503         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 504         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 505         return -1;
 506     }
 507
 508     if (s->avctx->scenechange_threshold < 1000000000 &&
 509         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 510         av_log(avctx, AV_LOG_ERROR,
 511                "closed gop with scene change detection are not supported yet, "
 512                "set threshold to 1000000000\n");
 513         return -1;
 514     }
 515
 516     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 517         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 518             av_log(avctx, AV_LOG_ERROR,
 519                   "low delay forcing is only available for mpeg2\n");
 520             return -1;
 521         }
 522         if (s->max_b_frames != 0) {
 523             av_log(avctx, AV_LOG_ERROR,
 524                    "b frames cannot be used with low delay\n");
 525             return -1;
 526         }
 527     }
 528
 529     if (s->q_scale_type == 1) {
 530         if (avctx->qmax > 12) {
 531             av_log(avctx, AV_LOG_ERROR,
 532                    "non linear quant only supports qmax <= 12 currently\n");
 533             return -1;
 534         }
 535     }
 536
 537     if (s->avctx->thread_count > 1         &&
 538         s->codec_id != AV_CODEC_ID_MPEG4      &&
 539         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 540         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 541         (s->codec_id != AV_CODEC_ID_H263P)) {
 542         av_log(avctx, AV_LOG_ERROR,
 543                "multi threaded encoding not supported by codec\n");
 544         return -1;
 545     }
 546
 547     if (s->avctx->thread_count < 1) {
 548         av_log(avctx, AV_LOG_ERROR,
 549                "automatic thread number detection not supported by codec,"
 550                "patch welcome\n");
 551         return -1;
 552     }
 553
 554     if (s->avctx->thread_count > 1)
 555         s->rtp_mode = 1;
 556
 557     if (!avctx->time_base.den || !avctx->time_base.num) {
 558         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 559         return -1;
 560     }
 561
 562     i = (INT_MAX / 2 + 128) >> 8;
 563     if (avctx->me_threshold >= i) {
 564         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
 565                i - 1);
 566         return -1;
 567     }
 568     if (avctx->mb_threshold >= i) {
 569         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 570                i - 1);
 571         return -1;
 572     }
 573
 574     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 575         av_log(avctx, AV_LOG_INFO,
 576                "notice: b_frame_strategy only affects the first pass\n");
 577         avctx->b_frame_strategy = 0;
 578     }
 579
 580     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 581     if (i > 1) {
 582         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 583         avctx->time_base.den /= i;
 584         avctx->time_base.num /= i;
 585         //return -1;
 586     }
 587
 588     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 589         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
 590         // (a + x * 3 / 8) / x
 591         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 592         s->inter_quant_bias = 0;
 593     } else {
 594         s->intra_quant_bias = 0;
 595         // (a - x / 4) / x
 596         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 597     }
 598
 599     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 600         s->intra_quant_bias = avctx->intra_quant_bias;
 601     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 602         s->inter_quant_bias = avctx->inter_quant_bias;
 603
 604     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
 605                                   &chroma_v_shift);
 606
 607     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 608         s->avctx->time_base.den > (1 << 16) - 1) {
 609         av_log(avctx, AV_LOG_ERROR,
 610                "timebase %d/%d not supported by MPEG 4 standard, "
 611                "the maximum admitted value for the timebase denominator "
 612                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 613                (1 << 16) - 1);
 614         return -1;
 615     }
 616     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 617
 618 #if FF_API_MPV_GLOBAL_OPTS
 619     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
 620         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
 621     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
 622         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
 623     if (avctx->quantizer_noise_shaping)
 624         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
 625 #endif
 626
 627     switch (avctx->codec->id) {
 628     case AV_CODEC_ID_MPEG1VIDEO:
 629         s->out_format = FMT_MPEG1;
 630         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 631         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 632         break;
 633     case AV_CODEC_ID_MPEG2VIDEO:
 634         s->out_format = FMT_MPEG1;
 635         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 636         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 637         s->rtp_mode   = 1;
 638         break;
 639     case AV_CODEC_ID_LJPEG:
 640     case AV_CODEC_ID_MJPEG:
 641         s->out_format = FMT_MJPEG;
 642         s->intra_only = 1; /* force intra only for jpeg */
 643         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
 644             avctx->pix_fmt   == AV_PIX_FMT_BGRA) {
 645             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
 646             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
 647             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
 648         } else {
 649             s->mjpeg_vsample[0] = 2;
 650             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
 651             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
 652             s->mjpeg_hsample[0] = 2;
 653             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
 654             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
 655         }
 656         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
 657             ff_mjpeg_encode_init(s) < 0)
 658             return -1;
 659         avctx->delay = 0;
 660         s->low_delay = 1;
 661         break;
 662     case AV_CODEC_ID_H261:
 663         if (!CONFIG_H261_ENCODER)
 664             return -1;
 665         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 666             av_log(avctx, AV_LOG_ERROR,
 667                    "The specified picture size of %dx%d is not valid for the "
 668                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 669                     s->width, s->height);
 670             return -1;
 671         }
 672         s->out_format = FMT_H261;
 673         avctx->delay  = 0;
 674         s->low_delay  = 1;
 675         break;
 676     case AV_CODEC_ID_H263:
 677         if (!CONFIG_H263_ENCODER)
 678         return -1;
 679         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 680                              s->width, s->height) == 8) {
 681             av_log(avctx, AV_LOG_INFO,
 682                    "The specified picture size of %dx%d is not valid for "
 683                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 684                    "352x288, 704x576, and 1408x1152."
 685                    "Try H.263+.\n", s->width, s->height);
 686             return -1;
 687         }
 688         s->out_format = FMT_H263;
 689         avctx->delay  = 0;
 690         s->low_delay  = 1;
 691         break;
 692     case AV_CODEC_ID_H263P:
 693         s->out_format = FMT_H263;
 694         s->h263_plus  = 1;
 695         /* Fx */
 696         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 697         s->modified_quant  = s->h263_aic;
 698         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 699         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 700
 701         /* /Fx */
 702         /* These are just to be sure */
 703         avctx->delay = 0;
 704         s->low_delay = 1;
 705         break;
 706     case AV_CODEC_ID_FLV1:
 707         s->out_format      = FMT_H263;
 708         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 709         s->unrestricted_mv = 1;
 710         s->rtp_mode  = 0; /* don't allow GOB */
 711         avctx->delay = 0;
 712         s->low_delay = 1;
 713         break;
 714     case AV_CODEC_ID_RV10:
 715         s->out_format = FMT_H263;
 716         avctx->delay  = 0;
 717         s->low_delay  = 1;
 718         break;
 719     case AV_CODEC_ID_RV20:
 720         s->out_format      = FMT_H263;
 721         avctx->delay       = 0;
 722         s->low_delay       = 1;
 723         s->modified_quant  = 1;
 724         s->h263_aic        = 1;
 725         s->h263_plus       = 1;
 726         s->loop_filter     = 1;
 727         s->unrestricted_mv = 0;
 728         break;
 729     case AV_CODEC_ID_MPEG4:
 730         s->out_format      = FMT_H263;
 731         s->h263_pred       = 1;
 732         s->unrestricted_mv = 1;
 733         s->low_delay       = s->max_b_frames ? 0 : 1;
 734         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 735         break;
 736     case AV_CODEC_ID_MSMPEG4V2:
 737         s->out_format      = FMT_H263;
 738         s->h263_pred       = 1;
 739         s->unrestricted_mv = 1;
 740         s->msmpeg4_version = 2;
 741         avctx->delay       = 0;
 742         s->low_delay       = 1;
 743         break;
 744     case AV_CODEC_ID_MSMPEG4V3:
 745         s->out_format        = FMT_H263;
 746         s->h263_pred         = 1;
 747         s->unrestricted_mv   = 1;
 748         s->msmpeg4_version   = 3;
 749         s->flipflop_rounding = 1;
 750         avctx->delay         = 0;
 751         s->low_delay         = 1;
 752         break;
 753     case AV_CODEC_ID_WMV1:
 754         s->out_format        = FMT_H263;
 755         s->h263_pred         = 1;
 756         s->unrestricted_mv   = 1;
 757         s->msmpeg4_version   = 4;
 758         s->flipflop_rounding = 1;
 759         avctx->delay         = 0;
 760         s->low_delay         = 1;
 761         break;
 762     case AV_CODEC_ID_WMV2:
 763         s->out_format        = FMT_H263;
 764         s->h263_pred         = 1;
 765         s->unrestricted_mv   = 1;
 766         s->msmpeg4_version   = 5;
 767         s->flipflop_rounding = 1;
 768         avctx->delay         = 0;
 769         s->low_delay         = 1;
 770         break;
 771     default:
 772         return -1;
 773     }
 774
 775     avctx->has_b_frames = !s->low_delay;
 776
 777     s->encoding = 1;
 778
 779     s->progressive_frame    =
 780     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 781                                                 CODEC_FLAG_INTERLACED_ME) ||
 782                                 s->alternate_scan);
 783
 784     /* init */
 785     if (ff_MPV_common_init(s) < 0)
 786         return -1;
 787
 788     if (ARCH_X86)
 789         ff_MPV_encode_init_x86(s);
 790
 791     if (!s->dct_quantize)
 792         s->dct_quantize = ff_dct_quantize_c;
 793     if (!s->denoise_dct)
 794         s->denoise_dct  = denoise_dct_c;
 795     s->fast_dct_quantize = s->dct_quantize;
 796     if (avctx->trellis)
 797         s->dct_quantize  = dct_quantize_trellis_c;
 798
 799     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 800         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 801
 802     s->quant_precision = 5;
 803
 804     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
 805     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
 806
 807     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 808         ff_h261_encode_init(s);
 809     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 810         ff_h263_encode_init(s);
 811     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 812         ff_msmpeg4_encode_init(s);
 813     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 814         && s->out_format == FMT_MPEG1)
 815         ff_mpeg1_encode_init(s);
 816
 817     /* init q matrix */
 818     for (i = 0; i < 64; i++) {
 819         int j = s->dsp.idct_permutation[i];
 820         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 821             s->mpeg_quant) {
 822             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 823             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 824         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 825             s->intra_matrix[j] =
 826             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 827         } else {
 828             /* mpeg1/2 */
 829             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 830             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 831         }
 832         if (s->avctx->intra_matrix)
 833             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 834         if (s->avctx->inter_matrix)
 835             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 836     }
 837
 838     /* precompute matrix */
 839     /* for mjpeg, we do include qscale in the matrix */
 840     if (s->out_format != FMT_MJPEG) {
 841         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
 842                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 843                           31, 1);
 844         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
 845                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 846                           31, 0);
 847     }
 848
 849     if (ff_rate_control_init(s) < 0)
 850         return -1;
 851
 852     return 0;
 853 }
 854
 855 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
 856 {
 857     MpegEncContext *s = avctx->priv_data;
 858
 859     ff_rate_control_uninit(s);
 860
 861     ff_MPV_common_end(s);
 862     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
 863         s->out_format == FMT_MJPEG)
 864         ff_mjpeg_encode_close(s);
 865
 866     av_freep(&avctx->extradata);
 867
 868     return 0;
 869 }
 870
 871 static int get_sae(uint8_t *src, int ref, int stride)
 872 {
 873     int x,y;
 874     int acc = 0;
 875
 876     for (y = 0; y < 16; y++) {
 877         for (x = 0; x < 16; x++) {
 878             acc += FFABS(src[x + y * stride] - ref);
 879         }
 880     }
 881
 882     return acc;
 883 }
 884
 885 static int get_intra_count(MpegEncContext *s, uint8_t *src,
 886                            uint8_t *ref, int stride)
 887 {
 888     int x, y, w, h;
 889     int acc = 0;
 890
 891     w = s->width  & ~15;
 892     h = s->height & ~15;
 893
 894     for (y = 0; y < h; y += 16) {
 895         for (x = 0; x < w; x += 16) {
 896             int offset = x + y * stride;
 897             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
 898                                      16);
 899             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
 900             int sae  = get_sae(src + offset, mean, stride);
 901
 902             acc += sae + 500 < sad;
 903         }
 904     }
 905     return acc;
 906 }
 907
 908
 909 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
 910 {
 911     AVFrame *pic = NULL;
 912     int64_t pts;
 913     int i;
 914     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
 915                                                  (s->low_delay ? 0 : 1);
 916     int direct = 1;
 917
 918     if (pic_arg) {
 919         pts = pic_arg->pts;
 920         pic_arg->display_picture_number = s->input_picture_number++;
 921
 922         if (pts != AV_NOPTS_VALUE) {
 923             if (s->user_specified_pts != AV_NOPTS_VALUE) {
 924                 int64_t time = pts;
 925                 int64_t last = s->user_specified_pts;
 926
 927                 if (time <= last) {
 928                     av_log(s->avctx, AV_LOG_ERROR,
 929                            "Error, Invalid timestamp=%"PRId64", "
 930                            "last=%"PRId64"\n", pts, s->user_specified_pts);
 931                     return -1;
 932                 }
 933
 934                 if (!s->low_delay && pic_arg->display_picture_number == 1)
 935                     s->dts_delta = time - last;
 936             }
 937             s->user_specified_pts = pts;
 938         } else {
 939             if (s->user_specified_pts != AV_NOPTS_VALUE) {
 940                 s->user_specified_pts =
 941                 pts = s->user_specified_pts + 1;
 942                 av_log(s->avctx, AV_LOG_INFO,
 943                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
 944                        pts);
 945             } else {
 946                 pts = pic_arg->display_picture_number;
 947             }
 948         }
 949     }
 950
 951   if (pic_arg) {
 952     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
 953         direct = 0;
 954     if (pic_arg->linesize[0] != s->linesize)
 955         direct = 0;
 956     if (pic_arg->linesize[1] != s->uvlinesize)
 957         direct = 0;
 958     if (pic_arg->linesize[2] != s->uvlinesize)
 959         direct = 0;
 960
 961     av_dlog(s->avctx, "%d %d %d %d\n", pic_arg->linesize[0],
 962             pic_arg->linesize[1], s->linesize, s->uvlinesize);
 963
 964     if (direct) {
 965         i = ff_find_unused_picture(s, 1);
 966         if (i < 0)
 967             return i;
 968
 969         pic = &s->picture[i].f;
 970         pic->reference = 3;
 971
 972         for (i = 0; i < 4; i++) {
 973             pic->data[i]     = pic_arg->data[i];
 974             pic->linesize[i] = pic_arg->linesize[i];
 975         }
 976         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
 977             return -1;
 978         }
 979     } else {
 980         i = ff_find_unused_picture(s, 0);
 981         if (i < 0)
 982             return i;
 983
 984         pic = &s->picture[i].f;
 985         pic->reference = 3;
 986
 987         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
 988             return -1;
 989         }
 990
 991         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
 992             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
 993             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
 994             // empty
 995         } else {
 996             int h_chroma_shift, v_chroma_shift;
 997             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
 998                                           &v_chroma_shift);
 999
1000             for (i = 0; i < 3; i++) {
1001                 int src_stride = pic_arg->linesize[i];
1002                 int dst_stride = i ? s->uvlinesize : s->linesize;
1003                 int h_shift = i ? h_chroma_shift : 0;
1004                 int v_shift = i ? v_chroma_shift : 0;
1005                 int w = s->width  >> h_shift;
1006                 int h = s->height >> v_shift;
1007                 uint8_t *src = pic_arg->data[i];
1008                 uint8_t *dst = pic->data[i];
1009
1010                 if (!s->avctx->rc_buffer_size)
1011                     dst += INPLACE_OFFSET;
1012
1013                 if (src_stride == dst_stride)
1014                     memcpy(dst, src, src_stride * h);
1015                 else {
1016                     while (h--) {
1017                         memcpy(dst, src, w);
1018                         dst += dst_stride;
1019                         src += src_stride;
1020                     }
1021                 }
1022             }
1023         }
1024     }
1025     copy_picture_attributes(s, pic, pic_arg);
1026     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1027   }
1028
1029     /* shift buffer entries */
1030     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1031         s->input_picture[i - 1] = s->input_picture[i];
1032
1033     s->input_picture[encoding_delay] = (Picture*) pic;
1034
1035     return 0;
1036 }
1037
1038 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1039 {
1040     int x, y, plane;
1041     int score = 0;
1042     int64_t score64 = 0;
1043
1044     for (plane = 0; plane < 3; plane++) {
1045         const int stride = p->f.linesize[plane];
1046         const int bw = plane ? 1 : 2;
1047         for (y = 0; y < s->mb_height * bw; y++) {
1048             for (x = 0; x < s->mb_width * bw; x++) {
1049                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1050                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1051                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1052                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1053
1054                 switch (s->avctx->frame_skip_exp) {
1055                 case 0: score    =  FFMAX(score, v);          break;
1056                 case 1: score   += FFABS(v);                  break;
1057                 case 2: score   += v * v;                     break;
1058                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1059                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1060                 }
1061             }
1062         }
1063     }
1064
1065     if (score)
1066         score64 = score;
1067
1068     if (score64 < s->avctx->frame_skip_threshold)
1069         return 1;
1070     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1071         return 1;
1072     return 0;
1073 }
1074
1075 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1076 {
1077     AVPacket pkt = { 0 };
1078     int ret, got_output;
1079
1080     av_init_packet(&pkt);
1081     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1082     if (ret < 0)
1083         return ret;
1084
1085     ret = pkt.size;
1086     av_free_packet(&pkt);
1087     return ret;
1088 }
1089
1090 static int estimate_best_b_count(MpegEncContext *s)
1091 {
1092     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1093     AVCodecContext *c = avcodec_alloc_context3(NULL);
1094     AVFrame input[FF_MAX_B_FRAMES + 2];
1095     const int scale = s->avctx->brd_scale;
1096     int i, j, out_size, p_lambda, b_lambda, lambda2;
1097     int64_t best_rd  = INT64_MAX;
1098     int best_b_count = -1;
1099
1100     assert(scale >= 0 && scale <= 3);
1101
1102     //emms_c();
1103     //s->next_picture_ptr->quality;
1104     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1105     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1106     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1107     if (!b_lambda) // FIXME we should do this somewhere else
1108         b_lambda = p_lambda;
1109     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1110                FF_LAMBDA_SHIFT;
1111
1112     c->width        = s->width  >> scale;
1113     c->height       = s->height >> scale;
1114     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1115                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1116     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1117     c->mb_decision  = s->avctx->mb_decision;
1118     c->me_cmp       = s->avctx->me_cmp;
1119     c->mb_cmp       = s->avctx->mb_cmp;
1120     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1121     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1122     c->time_base    = s->avctx->time_base;
1123     c->max_b_frames = s->max_b_frames;
1124
1125     if (avcodec_open2(c, codec, NULL) < 0)
1126         return -1;
1127
1128     for (i = 0; i < s->max_b_frames + 2; i++) {
1129         int ysize = c->width * c->height;
1130         int csize = (c->width / 2) * (c->height / 2);
1131         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1132                                                 s->next_picture_ptr;
1133
1134         avcodec_get_frame_defaults(&input[i]);
1135         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1136         input[i].data[1]     = input[i].data[0] + ysize;
1137         input[i].data[2]     = input[i].data[1] + csize;
1138         input[i].linesize[0] = c->width;
1139         input[i].linesize[1] =
1140         input[i].linesize[2] = c->width / 2;
1141
1142         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1143             pre_input = *pre_input_ptr;
1144
1145             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1146                 pre_input.f.data[0] += INPLACE_OFFSET;
1147                 pre_input.f.data[1] += INPLACE_OFFSET;
1148                 pre_input.f.data[2] += INPLACE_OFFSET;
1149             }
1150
1151             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1152                                  pre_input.f.data[0], pre_input.f.linesize[0],
1153                                  c->width,      c->height);
1154             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1155                                  pre_input.f.data[1], pre_input.f.linesize[1],
1156                                  c->width >> 1, c->height >> 1);
1157             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1158                                  pre_input.f.data[2], pre_input.f.linesize[2],
1159                                  c->width >> 1, c->height >> 1);
1160         }
1161     }
1162
1163     for (j = 0; j < s->max_b_frames + 1; j++) {
1164         int64_t rd = 0;
1165
1166         if (!s->input_picture[j])
1167             break;
1168
1169         c->error[0] = c->error[1] = c->error[2] = 0;
1170
1171         input[0].pict_type = AV_PICTURE_TYPE_I;
1172         input[0].quality   = 1 * FF_QP2LAMBDA;
1173
1174         out_size = encode_frame(c, &input[0]);
1175
1176         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1177
1178         for (i = 0; i < s->max_b_frames + 1; i++) {
1179             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1180
1181             input[i + 1].pict_type = is_p ?
1182                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1183             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1184
1185             out_size = encode_frame(c, &input[i + 1]);
1186
1187             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1188         }
1189
1190         /* get the delayed frames */
1191         while (out_size) {
1192             out_size = encode_frame(c, NULL);
1193             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1194         }
1195
1196         rd += c->error[0] + c->error[1] + c->error[2];
1197
1198         if (rd < best_rd) {
1199             best_rd = rd;
1200             best_b_count = j;
1201         }
1202     }
1203
1204     avcodec_close(c);
1205     av_freep(&c);
1206
1207     for (i = 0; i < s->max_b_frames + 2; i++) {
1208         av_freep(&input[i].data[0]);
1209     }
1210
1211     return best_b_count;
1212 }
1213
1214 static int select_input_picture(MpegEncContext *s)
1215 {
1216     int i;
1217
1218     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1219         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1220     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1221
1222     /* set next picture type & ordering */
1223     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1224         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1225             s->next_picture_ptr == NULL || s->intra_only) {
1226             s->reordered_input_picture[0] = s->input_picture[0];
1227             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1228             s->reordered_input_picture[0]->f.coded_picture_number =
1229                 s->coded_picture_number++;
1230         } else {
1231             int b_frames;
1232
1233             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1234                 if (s->picture_in_gop_number < s->gop_size &&
1235                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1236                     // FIXME check that te gop check above is +-1 correct
1237                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1238                         for (i = 0; i < 4; i++)
1239                             s->input_picture[0]->f.data[i] = NULL;
1240                         s->input_picture[0]->f.type = 0;
1241                     } else {
1242                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1243                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1244
1245                         s->avctx->release_buffer(s->avctx,
1246                                                  &s->input_picture[0]->f);
1247                     }
1248
1249                     emms_c();
1250                     ff_vbv_update(s, 0);
1251
1252                     goto no_output_pic;
1253                 }
1254             }
1255
1256             if (s->flags & CODEC_FLAG_PASS2) {
1257                 for (i = 0; i < s->max_b_frames + 1; i++) {
1258                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1259
1260                     if (pict_num >= s->rc_context.num_entries)
1261                         break;
1262                     if (!s->input_picture[i]) {
1263                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1264                         break;
1265                     }
1266
1267                     s->input_picture[i]->f.pict_type =
1268                         s->rc_context.entry[pict_num].new_pict_type;
1269                 }
1270             }
1271
1272             if (s->avctx->b_frame_strategy == 0) {
1273                 b_frames = s->max_b_frames;
1274                 while (b_frames && !s->input_picture[b_frames])
1275                     b_frames--;
1276             } else if (s->avctx->b_frame_strategy == 1) {
1277                 for (i = 1; i < s->max_b_frames + 1; i++) {
1278                     if (s->input_picture[i] &&
1279                         s->input_picture[i]->b_frame_score == 0) {
1280                         s->input_picture[i]->b_frame_score =
1281                             get_intra_count(s,
1282                                             s->input_picture[i    ]->f.data[0],
1283                                             s->input_picture[i - 1]->f.data[0],
1284                                             s->linesize) + 1;
1285                     }
1286                 }
1287                 for (i = 0; i < s->max_b_frames + 1; i++) {
1288                     if (s->input_picture[i] == NULL ||
1289                         s->input_picture[i]->b_frame_score - 1 >
1290                             s->mb_num / s->avctx->b_sensitivity)
1291                         break;
1292                 }
1293
1294                 b_frames = FFMAX(0, i - 1);
1295
1296                 /* reset scores */
1297                 for (i = 0; i < b_frames + 1; i++) {
1298                     s->input_picture[i]->b_frame_score = 0;
1299                 }
1300             } else if (s->avctx->b_frame_strategy == 2) {
1301                 b_frames = estimate_best_b_count(s);
1302             } else {
1303                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1304                 b_frames = 0;
1305             }
1306
1307             emms_c();
1308
1309             for (i = b_frames - 1; i >= 0; i--) {
1310                 int type = s->input_picture[i]->f.pict_type;
1311                 if (type && type != AV_PICTURE_TYPE_B)
1312                     b_frames = i;
1313             }
1314             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1315                 b_frames == s->max_b_frames) {
1316                 av_log(s->avctx, AV_LOG_ERROR,
1317                        "warning, too many b frames in a row\n");
1318             }
1319
1320             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1321                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1322                     s->gop_size > s->picture_in_gop_number) {
1323                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1324                 } else {
1325                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1326                         b_frames = 0;
1327                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1328                 }
1329             }
1330
1331             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1332                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1333                 b_frames--;
1334
1335             s->reordered_input_picture[0] = s->input_picture[b_frames];
1336             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1337                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1338             s->reordered_input_picture[0]->f.coded_picture_number =
1339                 s->coded_picture_number++;
1340             for (i = 0; i < b_frames; i++) {
1341                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1342                 s->reordered_input_picture[i + 1]->f.pict_type =
1343                     AV_PICTURE_TYPE_B;
1344                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1345                     s->coded_picture_number++;
1346             }
1347         }
1348     }
1349 no_output_pic:
1350     if (s->reordered_input_picture[0]) {
1351         s->reordered_input_picture[0]->f.reference =
1352            s->reordered_input_picture[0]->f.pict_type !=
1353                AV_PICTURE_TYPE_B ? 3 : 0;
1354
1355         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1356
1357         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1358             s->avctx->rc_buffer_size) {
1359             // input is a shared pix, so we can't modifiy it -> alloc a new
1360             // one & ensure that the shared one is reuseable
1361
1362             Picture *pic;
1363             int i = ff_find_unused_picture(s, 0);
1364             if (i < 0)
1365                 return i;
1366             pic = &s->picture[i];
1367
1368             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1369             if (ff_alloc_picture(s, pic, 0) < 0) {
1370                 return -1;
1371             }
1372
1373             /* mark us unused / free shared pic */
1374             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1375                 s->avctx->release_buffer(s->avctx,
1376                                          &s->reordered_input_picture[0]->f);
1377             for (i = 0; i < 4; i++)
1378                 s->reordered_input_picture[0]->f.data[i] = NULL;
1379             s->reordered_input_picture[0]->f.type = 0;
1380
1381             copy_picture_attributes(s, &pic->f,
1382                                     &s->reordered_input_picture[0]->f);
1383
1384             s->current_picture_ptr = pic;
1385         } else {
1386             // input is not a shared pix -> reuse buffer for current_pix
1387
1388             assert(s->reordered_input_picture[0]->f.type ==
1389                        FF_BUFFER_TYPE_USER ||
1390                    s->reordered_input_picture[0]->f.type ==
1391                        FF_BUFFER_TYPE_INTERNAL);
1392
1393             s->current_picture_ptr = s->reordered_input_picture[0];
1394             for (i = 0; i < 4; i++) {
1395                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1396             }
1397         }
1398         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1399
1400         s->picture_number = s->new_picture.f.display_picture_number;
1401     } else {
1402         memset(&s->new_picture, 0, sizeof(Picture));
1403     }
1404     return 0;
1405 }
1406
1407 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1408                           const AVFrame *pic_arg, int *got_packet)
1409 {
1410     MpegEncContext *s = avctx->priv_data;
1411     int i, stuffing_count, ret;
1412     int context_count = s->slice_context_count;
1413
1414     s->picture_in_gop_number++;
1415
1416     if (load_input_picture(s, pic_arg) < 0)
1417         return -1;
1418
1419     if (select_input_picture(s) < 0) {
1420         return -1;
1421     }
1422
1423     /* output? */
1424     if (s->new_picture.f.data[0]) {
1425         if (!pkt->data &&
1426             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1427             return ret;
1428         if (s->mb_info) {
1429             s->mb_info_ptr = av_packet_new_side_data(pkt,
1430                                  AV_PKT_DATA_H263_MB_INFO,
1431                                  s->mb_width*s->mb_height*12);
1432             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1433         }
1434
1435         for (i = 0; i < context_count; i++) {
1436             int start_y = s->thread_context[i]->start_mb_y;
1437             int   end_y = s->thread_context[i]->  end_mb_y;
1438             int h       = s->mb_height;
1439             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1440             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1441
1442             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1443         }
1444
1445         s->pict_type = s->new_picture.f.pict_type;
1446         //emms_c();
1447         ff_MPV_frame_start(s, avctx);
1448 vbv_retry:
1449         if (encode_picture(s, s->picture_number) < 0)
1450             return -1;
1451
1452         avctx->header_bits = s->header_bits;
1453         avctx->mv_bits     = s->mv_bits;
1454         avctx->misc_bits   = s->misc_bits;
1455         avctx->i_tex_bits  = s->i_tex_bits;
1456         avctx->p_tex_bits  = s->p_tex_bits;
1457         avctx->i_count     = s->i_count;
1458         // FIXME f/b_count in avctx
1459         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1460         avctx->skip_count  = s->skip_count;
1461
1462         ff_MPV_frame_end(s);
1463
1464         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1465             ff_mjpeg_encode_picture_trailer(s);
1466
1467         if (avctx->rc_buffer_size) {
1468             RateControlContext *rcc = &s->rc_context;
1469             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1470
1471             if (put_bits_count(&s->pb) > max_size &&
1472                 s->lambda < s->avctx->lmax) {
1473                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1474                                        (s->qscale + 1) / s->qscale);
1475                 if (s->adaptive_quant) {
1476                     int i;
1477                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1478                         s->lambda_table[i] =
1479                             FFMAX(s->lambda_table[i] + 1,
1480                                   s->lambda_table[i] * (s->qscale + 1) /
1481                                   s->qscale);
1482                 }
1483                 s->mb_skipped = 0;        // done in MPV_frame_start()
1484                 // done in encode_picture() so we must undo it
1485                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1486                     if (s->flipflop_rounding          ||
1487                         s->codec_id == AV_CODEC_ID_H263P ||
1488                         s->codec_id == AV_CODEC_ID_MPEG4)
1489                         s->no_rounding ^= 1;
1490                 }
1491                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1492                     s->time_base       = s->last_time_base;
1493                     s->last_non_b_time = s->time - s->pp_time;
1494                 }
1495                 for (i = 0; i < context_count; i++) {
1496                     PutBitContext *pb = &s->thread_context[i]->pb;
1497                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1498                 }
1499                 goto vbv_retry;
1500             }
1501
1502             assert(s->avctx->rc_max_rate);
1503         }
1504
1505         if (s->flags & CODEC_FLAG_PASS1)
1506             ff_write_pass1_stats(s);
1507
1508         for (i = 0; i < 4; i++) {
1509             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1510             avctx->error[i] += s->current_picture_ptr->f.error[i];
1511         }
1512
1513         if (s->flags & CODEC_FLAG_PASS1)
1514             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1515                    avctx->i_tex_bits + avctx->p_tex_bits ==
1516                        put_bits_count(&s->pb));
1517         flush_put_bits(&s->pb);
1518         s->frame_bits  = put_bits_count(&s->pb);
1519
1520         stuffing_count = ff_vbv_update(s, s->frame_bits);
1521         if (stuffing_count) {
1522             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1523                     stuffing_count + 50) {
1524                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1525                 return -1;
1526             }
1527
1528             switch (s->codec_id) {
1529             case AV_CODEC_ID_MPEG1VIDEO:
1530             case AV_CODEC_ID_MPEG2VIDEO:
1531                 while (stuffing_count--) {
1532                     put_bits(&s->pb, 8, 0);
1533                 }
1534             break;
1535             case AV_CODEC_ID_MPEG4:
1536                 put_bits(&s->pb, 16, 0);
1537                 put_bits(&s->pb, 16, 0x1C3);
1538                 stuffing_count -= 4;
1539                 while (stuffing_count--) {
1540                     put_bits(&s->pb, 8, 0xFF);
1541                 }
1542             break;
1543             default:
1544                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1545             }
1546             flush_put_bits(&s->pb);
1547             s->frame_bits  = put_bits_count(&s->pb);
1548         }
1549
1550         /* update mpeg1/2 vbv_delay for CBR */
1551         if (s->avctx->rc_max_rate                          &&
1552             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1553             s->out_format == FMT_MPEG1                     &&
1554             90000LL * (avctx->rc_buffer_size - 1) <=
1555                 s->avctx->rc_max_rate * 0xFFFFLL) {
1556             int vbv_delay, min_delay;
1557             double inbits  = s->avctx->rc_max_rate *
1558                              av_q2d(s->avctx->time_base);
1559             int    minbits = s->frame_bits - 8 *
1560                              (s->vbv_delay_ptr - s->pb.buf - 1);
1561             double bits    = s->rc_context.buffer_index + minbits - inbits;
1562
1563             if (bits < 0)
1564                 av_log(s->avctx, AV_LOG_ERROR,
1565                        "Internal error, negative bits\n");
1566
1567             assert(s->repeat_first_field == 0);
1568
1569             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1570             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1571                         s->avctx->rc_max_rate;
1572
1573             vbv_delay = FFMAX(vbv_delay, min_delay);
1574
1575             assert(vbv_delay < 0xFFFF);
1576
1577             s->vbv_delay_ptr[0] &= 0xF8;
1578             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1579             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1580             s->vbv_delay_ptr[2] &= 0x07;
1581             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1582             avctx->vbv_delay     = vbv_delay * 300;
1583         }
1584         s->total_bits     += s->frame_bits;
1585         avctx->frame_bits  = s->frame_bits;
1586
1587         pkt->pts = s->current_picture.f.pts;
1588         if (!s->low_delay) {
1589             if (!s->current_picture.f.coded_picture_number)
1590                 pkt->dts = pkt->pts - s->dts_delta;
1591             else
1592                 pkt->dts = s->reordered_pts;
1593             s->reordered_pts = s->input_picture[0]->f.pts;
1594         } else
1595             pkt->dts = pkt->pts;
1596         if (s->current_picture.f.key_frame)
1597             pkt->flags |= AV_PKT_FLAG_KEY;
1598         if (s->mb_info)
1599             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1600     } else {
1601         s->frame_bits = 0;
1602     }
1603     assert((s->frame_bits & 7) == 0);
1604
1605     pkt->size = s->frame_bits / 8;
1606     *got_packet = !!pkt->size;
1607     return 0;
1608 }
1609
1610 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1611                                                 int n, int threshold)
1612 {
1613     static const char tab[64] = {
1614         3, 2, 2, 1, 1, 1, 1, 1,
1615         1, 1, 1, 1, 1, 1, 1, 1,
1616         1, 1, 1, 1, 1, 1, 1, 1,
1617         0, 0, 0, 0, 0, 0, 0, 0,
1618         0, 0, 0, 0, 0, 0, 0, 0,
1619         0, 0, 0, 0, 0, 0, 0, 0,
1620         0, 0, 0, 0, 0, 0, 0, 0,
1621         0, 0, 0, 0, 0, 0, 0, 0
1622     };
1623     int score = 0;
1624     int run = 0;
1625     int i;
1626     DCTELEM *block = s->block[n];
1627     const int last_index = s->block_last_index[n];
1628     int skip_dc;
1629
1630     if (threshold < 0) {
1631         skip_dc = 0;
1632         threshold = -threshold;
1633     } else
1634         skip_dc = 1;
1635
1636     /* Are all we could set to zero already zero? */
1637     if (last_index <= skip_dc - 1)
1638         return;
1639
1640     for (i = 0; i <= last_index; i++) {
1641         const int j = s->intra_scantable.permutated[i];
1642         const int level = FFABS(block[j]);
1643         if (level == 1) {
1644             if (skip_dc && i == 0)
1645                 continue;
1646             score += tab[run];
1647             run = 0;
1648         } else if (level > 1) {
1649             return;
1650         } else {
1651             run++;
1652         }
1653     }
1654     if (score >= threshold)
1655         return;
1656     for (i = skip_dc; i <= last_index; i++) {
1657         const int j = s->intra_scantable.permutated[i];
1658         block[j] = 0;
1659     }
1660     if (block[0])
1661         s->block_last_index[n] = 0;
1662     else
1663         s->block_last_index[n] = -1;
1664 }
1665
1666 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1667                                int last_index)
1668 {
1669     int i;
1670     const int maxlevel = s->max_qcoeff;
1671     const int minlevel = s->min_qcoeff;
1672     int overflow = 0;
1673
1674     if (s->mb_intra) {
1675         i = 1; // skip clipping of intra dc
1676     } else
1677         i = 0;
1678
1679     for (; i <= last_index; i++) {
1680         const int j = s->intra_scantable.permutated[i];
1681         int level = block[j];
1682
1683         if (level > maxlevel) {
1684             level = maxlevel;
1685             overflow++;
1686         } else if (level < minlevel) {
1687             level = minlevel;
1688             overflow++;
1689         }
1690
1691         block[j] = level;
1692     }
1693
1694     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1695         av_log(s->avctx, AV_LOG_INFO,
1696                "warning, clipping %d dct coefficients to %d..%d\n",
1697                overflow, minlevel, maxlevel);
1698 }
1699
1700 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1701 {
1702     int x, y;
1703     // FIXME optimize
1704     for (y = 0; y < 8; y++) {
1705         for (x = 0; x < 8; x++) {
1706             int x2, y2;
1707             int sum = 0;
1708             int sqr = 0;
1709             int count = 0;
1710
1711             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1712                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1713                     int v = ptr[x2 + y2 * stride];
1714                     sum += v;
1715                     sqr += v * v;
1716                     count++;
1717                 }
1718             }
1719             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1720         }
1721     }
1722 }
1723
1724 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1725                                                 int motion_x, int motion_y,
1726                                                 int mb_block_height,
1727                                                 int mb_block_count)
1728 {
1729     int16_t weight[8][64];
1730     DCTELEM orig[8][64];
1731     const int mb_x = s->mb_x;
1732     const int mb_y = s->mb_y;
1733     int i;
1734     int skip_dct[8];
1735     int dct_offset = s->linesize * 8; // default for progressive frames
1736     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1737     int wrap_y, wrap_c;
1738
1739     for (i = 0; i < mb_block_count; i++)
1740         skip_dct[i] = s->skipdct;
1741
1742     if (s->adaptive_quant) {
1743         const int last_qp = s->qscale;
1744         const int mb_xy = mb_x + mb_y * s->mb_stride;
1745
1746         s->lambda = s->lambda_table[mb_xy];
1747         update_qscale(s);
1748
1749         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1750             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1751             s->dquant = s->qscale - last_qp;
1752
1753             if (s->out_format == FMT_H263) {
1754                 s->dquant = av_clip(s->dquant, -2, 2);
1755
1756                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1757                     if (!s->mb_intra) {
1758                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1759                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1760                                 s->dquant = 0;
1761                         }
1762                         if (s->mv_type == MV_TYPE_8X8)
1763                             s->dquant = 0;
1764                     }
1765                 }
1766             }
1767         }
1768         ff_set_qscale(s, last_qp + s->dquant);
1769     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1770         ff_set_qscale(s, s->qscale + s->dquant);
1771
1772     wrap_y = s->linesize;
1773     wrap_c = s->uvlinesize;
1774     ptr_y  = s->new_picture.f.data[0] +
1775              (mb_y * 16 * wrap_y)              + mb_x * 16;
1776     ptr_cb = s->new_picture.f.data[1] +
1777              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1778     ptr_cr = s->new_picture.f.data[2] +
1779              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1780
1781     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1782         uint8_t *ebuf = s->edge_emu_buffer + 32;
1783         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1784                                 mb_y * 16, s->width, s->height);
1785         ptr_y = ebuf;
1786         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1787                                 mb_block_height, mb_x * 8, mb_y * 8,
1788                                 s->width >> 1, s->height >> 1);
1789         ptr_cb = ebuf + 18 * wrap_y;
1790         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1791                                 mb_block_height, mb_x * 8, mb_y * 8,
1792                                 s->width >> 1, s->height >> 1);
1793         ptr_cr = ebuf + 18 * wrap_y + 8;
1794     }
1795
1796     if (s->mb_intra) {
1797         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1798             int progressive_score, interlaced_score;
1799
1800             s->interlaced_dct = 0;
1801             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1802                                                     NULL, wrap_y, 8) +
1803                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1804                                                     NULL, wrap_y, 8) - 400;
1805
1806             if (progressive_score > 0) {
1807                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1808                                                        NULL, wrap_y * 2, 8) +
1809                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1810                                                        NULL, wrap_y * 2, 8);
1811                 if (progressive_score > interlaced_score) {
1812                     s->interlaced_dct = 1;
1813
1814                     dct_offset = wrap_y;
1815                     wrap_y <<= 1;
1816                     if (s->chroma_format == CHROMA_422)
1817                         wrap_c <<= 1;
1818                 }
1819             }
1820         }
1821
1822         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1823         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1824         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1825         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1826
1827         if (s->flags & CODEC_FLAG_GRAY) {
1828             skip_dct[4] = 1;
1829             skip_dct[5] = 1;
1830         } else {
1831             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1832             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1833             if (!s->chroma_y_shift) { /* 422 */
1834                 s->dsp.get_pixels(s->block[6],
1835                                   ptr_cb + (dct_offset >> 1), wrap_c);
1836                 s->dsp.get_pixels(s->block[7],
1837                                   ptr_cr + (dct_offset >> 1), wrap_c);
1838             }
1839         }
1840     } else {
1841         op_pixels_func (*op_pix)[4];
1842         qpel_mc_func (*op_qpix)[16];
1843         uint8_t *dest_y, *dest_cb, *dest_cr;
1844
1845         dest_y  = s->dest[0];
1846         dest_cb = s->dest[1];
1847         dest_cr = s->dest[2];
1848
1849         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1850             op_pix  = s->dsp.put_pixels_tab;
1851             op_qpix = s->dsp.put_qpel_pixels_tab;
1852         } else {
1853             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1854             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1855         }
1856
1857         if (s->mv_dir & MV_DIR_FORWARD) {
1858             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1859                           s->last_picture.f.data,
1860                           op_pix, op_qpix);
1861             op_pix  = s->dsp.avg_pixels_tab;
1862             op_qpix = s->dsp.avg_qpel_pixels_tab;
1863         }
1864         if (s->mv_dir & MV_DIR_BACKWARD) {
1865             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1866                           s->next_picture.f.data,
1867                           op_pix, op_qpix);
1868         }
1869
1870         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1871             int progressive_score, interlaced_score;
1872
1873             s->interlaced_dct = 0;
1874             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1875                                                     ptr_y,              wrap_y,
1876                                                     8) +
1877                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1878                                                     ptr_y + wrap_y * 8, wrap_y,
1879                                                     8) - 400;
1880
1881             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1882                 progressive_score -= 400;
1883
1884             if (progressive_score > 0) {
1885                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1886                                                        ptr_y,
1887                                                        wrap_y * 2, 8) +
1888                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1889                                                        ptr_y + wrap_y,
1890                                                        wrap_y * 2, 8);
1891
1892                 if (progressive_score > interlaced_score) {
1893                     s->interlaced_dct = 1;
1894
1895                     dct_offset = wrap_y;
1896                     wrap_y <<= 1;
1897                     if (s->chroma_format == CHROMA_422)
1898                         wrap_c <<= 1;
1899                 }
1900             }
1901         }
1902
1903         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1904         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1905         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1906                            dest_y + dct_offset, wrap_y);
1907         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1908                            dest_y + dct_offset + 8, wrap_y);
1909
1910         if (s->flags & CODEC_FLAG_GRAY) {
1911             skip_dct[4] = 1;
1912             skip_dct[5] = 1;
1913         } else {
1914             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1915             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1916             if (!s->chroma_y_shift) { /* 422 */
1917                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1918                                    dest_cb + (dct_offset >> 1), wrap_c);
1919                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1920                                    dest_cr + (dct_offset >> 1), wrap_c);
1921             }
1922         }
1923         /* pre quantization */
1924         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1925                 2 * s->qscale * s->qscale) {
1926             // FIXME optimize
1927             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1928                               wrap_y, 8) < 20 * s->qscale)
1929                 skip_dct[0] = 1;
1930             if (s->dsp.sad[1](NULL, ptr_y + 8,
1931                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1932                 skip_dct[1] = 1;
1933             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1934                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1935                 skip_dct[2] = 1;
1936             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1937                               dest_y + dct_offset + 8,
1938                               wrap_y, 8) < 20 * s->qscale)
1939                 skip_dct[3] = 1;
1940             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1941                               wrap_c, 8) < 20 * s->qscale)
1942                 skip_dct[4] = 1;
1943             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1944                               wrap_c, 8) < 20 * s->qscale)
1945                 skip_dct[5] = 1;
1946             if (!s->chroma_y_shift) { /* 422 */
1947                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1948                                   dest_cb + (dct_offset >> 1),
1949                                   wrap_c, 8) < 20 * s->qscale)
1950                     skip_dct[6] = 1;
1951                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1952                                   dest_cr + (dct_offset >> 1),
1953                                   wrap_c, 8) < 20 * s->qscale)
1954                     skip_dct[7] = 1;
1955             }
1956         }
1957     }
1958
1959     if (s->quantizer_noise_shaping) {
1960         if (!skip_dct[0])
1961             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1962         if (!skip_dct[1])
1963             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1964         if (!skip_dct[2])
1965             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1966         if (!skip_dct[3])
1967             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1968         if (!skip_dct[4])
1969             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1970         if (!skip_dct[5])
1971             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1972         if (!s->chroma_y_shift) { /* 422 */
1973             if (!skip_dct[6])
1974                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1975                                   wrap_c);
1976             if (!skip_dct[7])
1977                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1978                                   wrap_c);
1979         }
1980         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
1981     }
1982
1983     /* DCT & quantize */
1984     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1985     {
1986         for (i = 0; i < mb_block_count; i++) {
1987             if (!skip_dct[i]) {
1988                 int overflow;
1989                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1990                 // FIXME we could decide to change to quantizer instead of
1991                 // clipping
1992                 // JS: I don't think that would be a good idea it could lower
1993                 //     quality instead of improve it. Just INTRADC clipping
1994                 //     deserves changes in quantizer
1995                 if (overflow)
1996                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
1997             } else
1998                 s->block_last_index[i] = -1;
1999         }
2000         if (s->quantizer_noise_shaping) {
2001             for (i = 0; i < mb_block_count; i++) {
2002                 if (!skip_dct[i]) {
2003                     s->block_last_index[i] =
2004                         dct_quantize_refine(s, s->block[i], weight[i],
2005                                             orig[i], i, s->qscale);
2006                 }
2007             }
2008         }
2009
2010         if (s->luma_elim_threshold && !s->mb_intra)
2011             for (i = 0; i < 4; i++)
2012                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2013         if (s->chroma_elim_threshold && !s->mb_intra)
2014             for (i = 4; i < mb_block_count; i++)
2015                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2016
2017         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2018             for (i = 0; i < mb_block_count; i++) {
2019                 if (s->block_last_index[i] == -1)
2020                     s->coded_score[i] = INT_MAX / 256;
2021             }
2022         }
2023     }
2024
2025     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2026         s->block_last_index[4] =
2027         s->block_last_index[5] = 0;
2028         s->block[4][0] =
2029         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2030     }
2031
2032     // non c quantize code returns incorrect block_last_index FIXME
2033     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2034         for (i = 0; i < mb_block_count; i++) {
2035             int j;
2036             if (s->block_last_index[i] > 0) {
2037                 for (j = 63; j > 0; j--) {
2038                     if (s->block[i][s->intra_scantable.permutated[j]])
2039                         break;
2040                 }
2041                 s->block_last_index[i] = j;
2042             }
2043         }
2044     }
2045
2046     /* huffman encode */
2047     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2048     case AV_CODEC_ID_MPEG1VIDEO:
2049     case AV_CODEC_ID_MPEG2VIDEO:
2050         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2051             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2052         break;
2053     case AV_CODEC_ID_MPEG4:
2054         if (CONFIG_MPEG4_ENCODER)
2055             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2056         break;
2057     case AV_CODEC_ID_MSMPEG4V2:
2058     case AV_CODEC_ID_MSMPEG4V3:
2059     case AV_CODEC_ID_WMV1:
2060         if (CONFIG_MSMPEG4_ENCODER)
2061             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2062         break;
2063     case AV_CODEC_ID_WMV2:
2064         if (CONFIG_WMV2_ENCODER)
2065             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2066         break;
2067     case AV_CODEC_ID_H261:
2068         if (CONFIG_H261_ENCODER)
2069             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2070         break;
2071     case AV_CODEC_ID_H263:
2072     case AV_CODEC_ID_H263P:
2073     case AV_CODEC_ID_FLV1:
2074     case AV_CODEC_ID_RV10:
2075     case AV_CODEC_ID_RV20:
2076         if (CONFIG_H263_ENCODER)
2077             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2078         break;
2079     case AV_CODEC_ID_MJPEG:
2080         if (CONFIG_MJPEG_ENCODER)
2081             ff_mjpeg_encode_mb(s, s->block);
2082         break;
2083     default:
2084         assert(0);
2085     }
2086 }
2087
2088 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2089 {
2090     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2091     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2092 }
2093
2094 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2095     int i;
2096
2097     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2098
2099     /* mpeg1 */
2100     d->mb_skip_run= s->mb_skip_run;
2101     for(i=0; i<3; i++)
2102         d->last_dc[i] = s->last_dc[i];
2103
2104     /* statistics */
2105     d->mv_bits= s->mv_bits;
2106     d->i_tex_bits= s->i_tex_bits;
2107     d->p_tex_bits= s->p_tex_bits;
2108     d->i_count= s->i_count;
2109     d->f_count= s->f_count;
2110     d->b_count= s->b_count;
2111     d->skip_count= s->skip_count;
2112     d->misc_bits= s->misc_bits;
2113     d->last_bits= 0;
2114
2115     d->mb_skipped= 0;
2116     d->qscale= s->qscale;
2117     d->dquant= s->dquant;
2118
2119     d->esc3_level_length= s->esc3_level_length;
2120 }
2121
2122 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2123     int i;
2124
2125     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2126     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2127
2128     /* mpeg1 */
2129     d->mb_skip_run= s->mb_skip_run;
2130     for(i=0; i<3; i++)
2131         d->last_dc[i] = s->last_dc[i];
2132
2133     /* statistics */
2134     d->mv_bits= s->mv_bits;
2135     d->i_tex_bits= s->i_tex_bits;
2136     d->p_tex_bits= s->p_tex_bits;
2137     d->i_count= s->i_count;
2138     d->f_count= s->f_count;
2139     d->b_count= s->b_count;
2140     d->skip_count= s->skip_count;
2141     d->misc_bits= s->misc_bits;
2142
2143     d->mb_intra= s->mb_intra;
2144     d->mb_skipped= s->mb_skipped;
2145     d->mv_type= s->mv_type;
2146     d->mv_dir= s->mv_dir;
2147     d->pb= s->pb;
2148     if(s->data_partitioning){
2149         d->pb2= s->pb2;
2150         d->tex_pb= s->tex_pb;
2151     }
2152     d->block= s->block;
2153     for(i=0; i<8; i++)
2154         d->block_last_index[i]= s->block_last_index[i];
2155     d->interlaced_dct= s->interlaced_dct;
2156     d->qscale= s->qscale;
2157
2158     d->esc3_level_length= s->esc3_level_length;
2159 }
2160
2161 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2162                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2163                            int *dmin, int *next_block, int motion_x, int motion_y)
2164 {
2165     int score;
2166     uint8_t *dest_backup[3];
2167
2168     copy_context_before_encode(s, backup, type);
2169
2170     s->block= s->blocks[*next_block];
2171     s->pb= pb[*next_block];
2172     if(s->data_partitioning){
2173         s->pb2   = pb2   [*next_block];
2174         s->tex_pb= tex_pb[*next_block];
2175     }
2176
2177     if(*next_block){
2178         memcpy(dest_backup, s->dest, sizeof(s->dest));
2179         s->dest[0] = s->rd_scratchpad;
2180         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2181         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2182         assert(s->linesize >= 32); //FIXME
2183     }
2184
2185     encode_mb(s, motion_x, motion_y);
2186
2187     score= put_bits_count(&s->pb);
2188     if(s->data_partitioning){
2189         score+= put_bits_count(&s->pb2);
2190         score+= put_bits_count(&s->tex_pb);
2191     }
2192
2193     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2194         ff_MPV_decode_mb(s, s->block);
2195
2196         score *= s->lambda2;
2197         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2198     }
2199
2200     if(*next_block){
2201         memcpy(s->dest, dest_backup, sizeof(s->dest));
2202     }
2203
2204     if(score<*dmin){
2205         *dmin= score;
2206         *next_block^=1;
2207
2208         copy_context_after_encode(best, s, type);
2209     }
2210 }
2211
2212 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2213     uint32_t *sq = ff_squareTbl + 256;
2214     int acc=0;
2215     int x,y;
2216
2217     if(w==16 && h==16)
2218         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2219     else if(w==8 && h==8)
2220         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2221
2222     for(y=0; y<h; y++){
2223         for(x=0; x<w; x++){
2224             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2225         }
2226     }
2227
2228     assert(acc>=0);
2229
2230     return acc;
2231 }
2232
2233 static int sse_mb(MpegEncContext *s){
2234     int w= 16;
2235     int h= 16;
2236
2237     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2238     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2239
2240     if(w==16 && h==16)
2241       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2242         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2243                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2244                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2245       }else{
2246         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2247                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2248                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2249       }
2250     else
2251         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2252                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2253                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2254 }
2255
2256 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2257     MpegEncContext *s= *(void**)arg;
2258
2259
2260     s->me.pre_pass=1;
2261     s->me.dia_size= s->avctx->pre_dia_size;
2262     s->first_slice_line=1;
2263     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2264         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2265             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2266         }
2267         s->first_slice_line=0;
2268     }
2269
2270     s->me.pre_pass=0;
2271
2272     return 0;
2273 }
2274
2275 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2276     MpegEncContext *s= *(void**)arg;
2277
2278     ff_check_alignment();
2279
2280     s->me.dia_size= s->avctx->dia_size;
2281     s->first_slice_line=1;
2282     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2283         s->mb_x=0; //for block init below
2284         ff_init_block_index(s);
2285         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2286             s->block_index[0]+=2;
2287             s->block_index[1]+=2;
2288             s->block_index[2]+=2;
2289             s->block_index[3]+=2;
2290
2291             /* compute motion vector & mb_type and store in context */
2292             if(s->pict_type==AV_PICTURE_TYPE_B)
2293                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2294             else
2295                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2296         }
2297         s->first_slice_line=0;
2298     }
2299     return 0;
2300 }
2301
2302 static int mb_var_thread(AVCodecContext *c, void *arg){
2303     MpegEncContext *s= *(void**)arg;
2304     int mb_x, mb_y;
2305
2306     ff_check_alignment();
2307
2308     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2309         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2310             int xx = mb_x * 16;
2311             int yy = mb_y * 16;
2312             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2313             int varc;
2314             int sum = s->dsp.pix_sum(pix, s->linesize);
2315
2316             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2317
2318             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2319             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2320             s->me.mb_var_sum_temp    += varc;
2321         }
2322     }
2323     return 0;
2324 }
2325
2326 static void write_slice_end(MpegEncContext *s){
2327     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2328         if(s->partitioned_frame){
2329             ff_mpeg4_merge_partitions(s);
2330         }
2331
2332         ff_mpeg4_stuffing(&s->pb);
2333     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2334         ff_mjpeg_encode_stuffing(&s->pb);
2335     }
2336
2337     avpriv_align_put_bits(&s->pb);
2338     flush_put_bits(&s->pb);
2339
2340     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2341         s->misc_bits+= get_bits_diff(s);
2342 }
2343
2344 static void write_mb_info(MpegEncContext *s)
2345 {
2346     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2347     int offset = put_bits_count(&s->pb);
2348     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2349     int gobn = s->mb_y / s->gob_index;
2350     int pred_x, pred_y;
2351     if (CONFIG_H263_ENCODER)
2352         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2353     bytestream_put_le32(&ptr, offset);
2354     bytestream_put_byte(&ptr, s->qscale);
2355     bytestream_put_byte(&ptr, gobn);
2356     bytestream_put_le16(&ptr, mba);
2357     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2358     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2359     /* 4MV not implemented */
2360     bytestream_put_byte(&ptr, 0); /* hmv2 */
2361     bytestream_put_byte(&ptr, 0); /* vmv2 */
2362 }
2363
2364 static void update_mb_info(MpegEncContext *s, int startcode)
2365 {
2366     if (!s->mb_info)
2367         return;
2368     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2369         s->mb_info_size += 12;
2370         s->prev_mb_info = s->last_mb_info;
2371     }
2372     if (startcode) {
2373         s->prev_mb_info = put_bits_count(&s->pb)/8;
2374         /* This might have incremented mb_info_size above, and we return without
2375          * actually writing any info into that slot yet. But in that case,
2376          * this will be called again at the start of the after writing the
2377          * start code, actually writing the mb info. */
2378         return;
2379     }
2380
2381     s->last_mb_info = put_bits_count(&s->pb)/8;
2382     if (!s->mb_info_size)
2383         s->mb_info_size += 12;
2384     write_mb_info(s);
2385 }
2386
2387 static int encode_thread(AVCodecContext *c, void *arg){
2388     MpegEncContext *s= *(void**)arg;
2389     int mb_x, mb_y, pdif = 0;
2390     int chr_h= 16>>s->chroma_y_shift;
2391     int i, j;
2392     MpegEncContext best_s, backup_s;
2393     uint8_t bit_buf[2][MAX_MB_BYTES];
2394     uint8_t bit_buf2[2][MAX_MB_BYTES];
2395     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2396     PutBitContext pb[2], pb2[2], tex_pb[2];
2397
2398     ff_check_alignment();
2399
2400     for(i=0; i<2; i++){
2401         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2402         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2403         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2404     }
2405
2406     s->last_bits= put_bits_count(&s->pb);
2407     s->mv_bits=0;
2408     s->misc_bits=0;
2409     s->i_tex_bits=0;
2410     s->p_tex_bits=0;
2411     s->i_count=0;
2412     s->f_count=0;
2413     s->b_count=0;
2414     s->skip_count=0;
2415
2416     for(i=0; i<3; i++){
2417         /* init last dc values */
2418         /* note: quant matrix value (8) is implied here */
2419         s->last_dc[i] = 128 << s->intra_dc_precision;
2420
2421         s->current_picture.f.error[i] = 0;
2422     }
2423     s->mb_skip_run = 0;
2424     memset(s->last_mv, 0, sizeof(s->last_mv));
2425
2426     s->last_mv_dir = 0;
2427
2428     switch(s->codec_id){
2429     case AV_CODEC_ID_H263:
2430     case AV_CODEC_ID_H263P:
2431     case AV_CODEC_ID_FLV1:
2432         if (CONFIG_H263_ENCODER)
2433             s->gob_index = ff_h263_get_gob_height(s);
2434         break;
2435     case AV_CODEC_ID_MPEG4:
2436         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2437             ff_mpeg4_init_partitions(s);
2438         break;
2439     }
2440
2441     s->resync_mb_x=0;
2442     s->resync_mb_y=0;
2443     s->first_slice_line = 1;
2444     s->ptr_lastgob = s->pb.buf;
2445     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2446         s->mb_x=0;
2447         s->mb_y= mb_y;
2448
2449         ff_set_qscale(s, s->qscale);
2450         ff_init_block_index(s);
2451
2452         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2453             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2454             int mb_type= s->mb_type[xy];
2455 //            int d;
2456             int dmin= INT_MAX;
2457             int dir;
2458
2459             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2460                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2461                 return -1;
2462             }
2463             if(s->data_partitioning){
2464                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2465                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2466                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2467                     return -1;
2468                 }
2469             }
2470
2471             s->mb_x = mb_x;
2472             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2473             ff_update_block_index(s);
2474
2475             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2476                 ff_h261_reorder_mb_index(s);
2477                 xy= s->mb_y*s->mb_stride + s->mb_x;
2478                 mb_type= s->mb_type[xy];
2479             }
2480
2481             /* write gob / video packet header  */
2482             if(s->rtp_mode){
2483                 int current_packet_size, is_gob_start;
2484
2485                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2486
2487                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2488
2489                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2490
2491                 switch(s->codec_id){
2492                 case AV_CODEC_ID_H263:
2493                 case AV_CODEC_ID_H263P:
2494                     if(!s->h263_slice_structured)
2495                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2496                     break;
2497                 case AV_CODEC_ID_MPEG2VIDEO:
2498                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2499                 case AV_CODEC_ID_MPEG1VIDEO:
2500                     if(s->mb_skip_run) is_gob_start=0;
2501                     break;
2502                 }
2503
2504                 if(is_gob_start){
2505                     if(s->start_mb_y != mb_y || mb_x!=0){
2506                         write_slice_end(s);
2507
2508                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2509                             ff_mpeg4_init_partitions(s);
2510                         }
2511                     }
2512
2513                     assert((put_bits_count(&s->pb)&7) == 0);
2514                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2515
2516                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2517                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2518                         int d= 100 / s->avctx->error_rate;
2519                         if(r % d == 0){
2520                             current_packet_size=0;
2521                             s->pb.buf_ptr= s->ptr_lastgob;
2522                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2523                         }
2524                     }
2525
2526                     if (s->avctx->rtp_callback){
2527                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2528                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2529                     }
2530                     update_mb_info(s, 1);
2531
2532                     switch(s->codec_id){
2533                     case AV_CODEC_ID_MPEG4:
2534                         if (CONFIG_MPEG4_ENCODER) {
2535                             ff_mpeg4_encode_video_packet_header(s);
2536                             ff_mpeg4_clean_buffers(s);
2537                         }
2538                     break;
2539                     case AV_CODEC_ID_MPEG1VIDEO:
2540                     case AV_CODEC_ID_MPEG2VIDEO:
2541                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2542                             ff_mpeg1_encode_slice_header(s);
2543                             ff_mpeg1_clean_buffers(s);
2544                         }
2545                     break;
2546                     case AV_CODEC_ID_H263:
2547                     case AV_CODEC_ID_H263P:
2548                         if (CONFIG_H263_ENCODER)
2549                             ff_h263_encode_gob_header(s, mb_y);
2550                     break;
2551                     }
2552
2553                     if(s->flags&CODEC_FLAG_PASS1){
2554                         int bits= put_bits_count(&s->pb);
2555                         s->misc_bits+= bits - s->last_bits;
2556                         s->last_bits= bits;
2557                     }
2558
2559                     s->ptr_lastgob += current_packet_size;
2560                     s->first_slice_line=1;
2561                     s->resync_mb_x=mb_x;
2562                     s->resync_mb_y=mb_y;
2563                 }
2564             }
2565
2566             if(  (s->resync_mb_x   == s->mb_x)
2567                && s->resync_mb_y+1 == s->mb_y){
2568                 s->first_slice_line=0;
2569             }
2570
2571             s->mb_skipped=0;
2572             s->dquant=0; //only for QP_RD
2573
2574             update_mb_info(s, 0);
2575
2576             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2577                 int next_block=0;
2578                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2579
2580                 copy_context_before_encode(&backup_s, s, -1);
2581                 backup_s.pb= s->pb;
2582                 best_s.data_partitioning= s->data_partitioning;
2583                 best_s.partitioned_frame= s->partitioned_frame;
2584                 if(s->data_partitioning){
2585                     backup_s.pb2= s->pb2;
2586                     backup_s.tex_pb= s->tex_pb;
2587                 }
2588
2589                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2590                     s->mv_dir = MV_DIR_FORWARD;
2591                     s->mv_type = MV_TYPE_16X16;
2592                     s->mb_intra= 0;
2593                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2594                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2595                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2596                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2597                 }
2598                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2599                     s->mv_dir = MV_DIR_FORWARD;
2600                     s->mv_type = MV_TYPE_FIELD;
2601                     s->mb_intra= 0;
2602                     for(i=0; i<2; i++){
2603                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2604                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2605                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2606                     }
2607                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2608                                  &dmin, &next_block, 0, 0);
2609                 }
2610                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2611                     s->mv_dir = MV_DIR_FORWARD;
2612                     s->mv_type = MV_TYPE_16X16;
2613                     s->mb_intra= 0;
2614                     s->mv[0][0][0] = 0;
2615                     s->mv[0][0][1] = 0;
2616                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2617                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2618                 }
2619                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2620                     s->mv_dir = MV_DIR_FORWARD;
2621                     s->mv_type = MV_TYPE_8X8;
2622                     s->mb_intra= 0;
2623                     for(i=0; i<4; i++){
2624                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2625                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2626                     }
2627                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2628                                  &dmin, &next_block, 0, 0);
2629                 }
2630                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2631                     s->mv_dir = MV_DIR_FORWARD;
2632                     s->mv_type = MV_TYPE_16X16;
2633                     s->mb_intra= 0;
2634                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2635                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2636                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2637                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2638                 }
2639                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2640                     s->mv_dir = MV_DIR_BACKWARD;
2641                     s->mv_type = MV_TYPE_16X16;
2642                     s->mb_intra= 0;
2643                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2644                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2645                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2646                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2647                 }
2648                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2649                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2650                     s->mv_type = MV_TYPE_16X16;
2651                     s->mb_intra= 0;
2652                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2653                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2654                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2655                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2656                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2657                                  &dmin, &next_block, 0, 0);
2658                 }
2659                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2660                     s->mv_dir = MV_DIR_FORWARD;
2661                     s->mv_type = MV_TYPE_FIELD;
2662                     s->mb_intra= 0;
2663                     for(i=0; i<2; i++){
2664                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2665                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2666                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2667                     }
2668                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2669                                  &dmin, &next_block, 0, 0);
2670                 }
2671                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2672                     s->mv_dir = MV_DIR_BACKWARD;
2673                     s->mv_type = MV_TYPE_FIELD;
2674                     s->mb_intra= 0;
2675                     for(i=0; i<2; i++){
2676                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2677                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2678                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2679                     }
2680                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2681                                  &dmin, &next_block, 0, 0);
2682                 }
2683                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2684                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2685                     s->mv_type = MV_TYPE_FIELD;
2686                     s->mb_intra= 0;
2687                     for(dir=0; dir<2; dir++){
2688                         for(i=0; i<2; i++){
2689                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2690                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2691                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2692                         }
2693                     }
2694                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2695                                  &dmin, &next_block, 0, 0);
2696                 }
2697                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2698                     s->mv_dir = 0;
2699                     s->mv_type = MV_TYPE_16X16;
2700                     s->mb_intra= 1;
2701                     s->mv[0][0][0] = 0;
2702                     s->mv[0][0][1] = 0;
2703                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2704                                  &dmin, &next_block, 0, 0);
2705                     if(s->h263_pred || s->h263_aic){
2706                         if(best_s.mb_intra)
2707                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2708                         else
2709                             ff_clean_intra_table_entries(s); //old mode?
2710                     }
2711                 }
2712
2713                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2714                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2715                         const int last_qp= backup_s.qscale;
2716                         int qpi, qp, dc[6];
2717                         DCTELEM ac[6][16];
2718                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2719                         static const int dquant_tab[4]={-1,1,-2,2};
2720
2721                         assert(backup_s.dquant == 0);
2722
2723                         //FIXME intra
2724                         s->mv_dir= best_s.mv_dir;
2725                         s->mv_type = MV_TYPE_16X16;
2726                         s->mb_intra= best_s.mb_intra;
2727                         s->mv[0][0][0] = best_s.mv[0][0][0];
2728                         s->mv[0][0][1] = best_s.mv[0][0][1];
2729                         s->mv[1][0][0] = best_s.mv[1][0][0];
2730                         s->mv[1][0][1] = best_s.mv[1][0][1];
2731
2732                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2733                         for(; qpi<4; qpi++){
2734                             int dquant= dquant_tab[qpi];
2735                             qp= last_qp + dquant;
2736                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2737                                 continue;
2738                             backup_s.dquant= dquant;
2739                             if(s->mb_intra && s->dc_val[0]){
2740                                 for(i=0; i<6; i++){
2741                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2742                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2743                                 }
2744                             }
2745
2746                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2747                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2748                             if(best_s.qscale != qp){
2749                                 if(s->mb_intra && s->dc_val[0]){
2750                                     for(i=0; i<6; i++){
2751                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2752                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2753                                     }
2754                                 }
2755                             }
2756                         }
2757                     }
2758                 }
2759                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2760                     int mx= s->b_direct_mv_table[xy][0];
2761                     int my= s->b_direct_mv_table[xy][1];
2762
2763                     backup_s.dquant = 0;
2764                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2765                     s->mb_intra= 0;
2766                     ff_mpeg4_set_direct_mv(s, mx, my);
2767                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2768                                  &dmin, &next_block, mx, my);
2769                 }
2770                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2771                     backup_s.dquant = 0;
2772                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2773                     s->mb_intra= 0;
2774                     ff_mpeg4_set_direct_mv(s, 0, 0);
2775                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2776                                  &dmin, &next_block, 0, 0);
2777                 }
2778                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2779                     int coded=0;
2780                     for(i=0; i<6; i++)
2781                         coded |= s->block_last_index[i];
2782                     if(coded){
2783                         int mx,my;
2784                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2785                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2786                             mx=my=0; //FIXME find the one we actually used
2787                             ff_mpeg4_set_direct_mv(s, mx, my);
2788                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2789                             mx= s->mv[1][0][0];
2790                             my= s->mv[1][0][1];
2791                         }else{
2792                             mx= s->mv[0][0][0];
2793                             my= s->mv[0][0][1];
2794                         }
2795
2796                         s->mv_dir= best_s.mv_dir;
2797                         s->mv_type = best_s.mv_type;
2798                         s->mb_intra= 0;
2799 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2800                         s->mv[0][0][1] = best_s.mv[0][0][1];
2801                         s->mv[1][0][0] = best_s.mv[1][0][0];
2802                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2803                         backup_s.dquant= 0;
2804                         s->skipdct=1;
2805                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2806                                         &dmin, &next_block, mx, my);
2807                         s->skipdct=0;
2808                     }
2809                 }
2810
2811                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2812
2813                 copy_context_after_encode(s, &best_s, -1);
2814
2815                 pb_bits_count= put_bits_count(&s->pb);
2816                 flush_put_bits(&s->pb);
2817                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2818                 s->pb= backup_s.pb;
2819
2820                 if(s->data_partitioning){
2821                     pb2_bits_count= put_bits_count(&s->pb2);
2822                     flush_put_bits(&s->pb2);
2823                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2824                     s->pb2= backup_s.pb2;
2825
2826                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2827                     flush_put_bits(&s->tex_pb);
2828                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2829                     s->tex_pb= backup_s.tex_pb;
2830                 }
2831                 s->last_bits= put_bits_count(&s->pb);
2832
2833                 if (CONFIG_H263_ENCODER &&
2834                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2835                     ff_h263_update_motion_val(s);
2836
2837                 if(next_block==0){ //FIXME 16 vs linesize16
2838                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2839                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2840                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2841                 }
2842
2843                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2844                     ff_MPV_decode_mb(s, s->block);
2845             } else {
2846                 int motion_x = 0, motion_y = 0;
2847                 s->mv_type=MV_TYPE_16X16;
2848                 // only one MB-Type possible
2849
2850                 switch(mb_type){
2851                 case CANDIDATE_MB_TYPE_INTRA:
2852                     s->mv_dir = 0;
2853                     s->mb_intra= 1;
2854                     motion_x= s->mv[0][0][0] = 0;
2855                     motion_y= s->mv[0][0][1] = 0;
2856                     break;
2857                 case CANDIDATE_MB_TYPE_INTER:
2858                     s->mv_dir = MV_DIR_FORWARD;
2859                     s->mb_intra= 0;
2860                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2861                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2862                     break;
2863                 case CANDIDATE_MB_TYPE_INTER_I:
2864                     s->mv_dir = MV_DIR_FORWARD;
2865                     s->mv_type = MV_TYPE_FIELD;
2866                     s->mb_intra= 0;
2867                     for(i=0; i<2; i++){
2868                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2869                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2870                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2871                     }
2872                     break;
2873                 case CANDIDATE_MB_TYPE_INTER4V:
2874                     s->mv_dir = MV_DIR_FORWARD;
2875                     s->mv_type = MV_TYPE_8X8;
2876                     s->mb_intra= 0;
2877                     for(i=0; i<4; i++){
2878                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2879                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2880                     }
2881                     break;
2882                 case CANDIDATE_MB_TYPE_DIRECT:
2883                     if (CONFIG_MPEG4_ENCODER) {
2884                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2885                         s->mb_intra= 0;
2886                         motion_x=s->b_direct_mv_table[xy][0];
2887                         motion_y=s->b_direct_mv_table[xy][1];
2888                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2889                     }
2890                     break;
2891                 case CANDIDATE_MB_TYPE_DIRECT0:
2892                     if (CONFIG_MPEG4_ENCODER) {
2893                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2894                         s->mb_intra= 0;
2895                         ff_mpeg4_set_direct_mv(s, 0, 0);
2896                     }
2897                     break;
2898                 case CANDIDATE_MB_TYPE_BIDIR:
2899                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2900                     s->mb_intra= 0;
2901                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2902                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2903                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2904                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2905                     break;
2906                 case CANDIDATE_MB_TYPE_BACKWARD:
2907                     s->mv_dir = MV_DIR_BACKWARD;
2908                     s->mb_intra= 0;
2909                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2910                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2911                     break;
2912                 case CANDIDATE_MB_TYPE_FORWARD:
2913                     s->mv_dir = MV_DIR_FORWARD;
2914                     s->mb_intra= 0;
2915                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2916                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2917                     break;
2918                 case CANDIDATE_MB_TYPE_FORWARD_I:
2919                     s->mv_dir = MV_DIR_FORWARD;
2920                     s->mv_type = MV_TYPE_FIELD;
2921                     s->mb_intra= 0;
2922                     for(i=0; i<2; i++){
2923                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2924                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2925                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2926                     }
2927                     break;
2928                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2929                     s->mv_dir = MV_DIR_BACKWARD;
2930                     s->mv_type = MV_TYPE_FIELD;
2931                     s->mb_intra= 0;
2932                     for(i=0; i<2; i++){
2933                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2934                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2935                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2936                     }
2937                     break;
2938                 case CANDIDATE_MB_TYPE_BIDIR_I:
2939                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2940                     s->mv_type = MV_TYPE_FIELD;
2941                     s->mb_intra= 0;
2942                     for(dir=0; dir<2; dir++){
2943                         for(i=0; i<2; i++){
2944                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2945                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2946                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2947                         }
2948                     }
2949                     break;
2950                 default:
2951                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2952                 }
2953
2954                 encode_mb(s, motion_x, motion_y);
2955
2956                 // RAL: Update last macroblock type
2957                 s->last_mv_dir = s->mv_dir;
2958
2959                 if (CONFIG_H263_ENCODER &&
2960                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2961                     ff_h263_update_motion_val(s);
2962
2963                 ff_MPV_decode_mb(s, s->block);
2964             }
2965
2966             /* clean the MV table in IPS frames for direct mode in B frames */
2967             if(s->mb_intra /* && I,P,S_TYPE */){
2968                 s->p_mv_table[xy][0]=0;
2969                 s->p_mv_table[xy][1]=0;
2970             }
2971
2972             if(s->flags&CODEC_FLAG_PSNR){
2973                 int w= 16;
2974                 int h= 16;
2975
2976                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2977                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2978
2979                 s->current_picture.f.error[0] += sse(
2980                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2981                     s->dest[0], w, h, s->linesize);
2982                 s->current_picture.f.error[1] += sse(
2983                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2984                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2985                 s->current_picture.f.error[2] += sse(
2986                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2987                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2988             }
2989             if(s->loop_filter){
2990                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2991                     ff_h263_loop_filter(s);
2992             }
2993             av_dlog(s->avctx, "MB %d %d bits\n",
2994                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
2995         }
2996     }
2997
2998     //not beautiful here but we must write it before flushing so it has to be here
2999     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3000         ff_msmpeg4_encode_ext_header(s);
3001
3002     write_slice_end(s);
3003
3004     /* Send the last GOB if RTP */
3005     if (s->avctx->rtp_callback) {
3006         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3007         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3008         /* Call the RTP callback to send the last GOB */
3009         emms_c();
3010         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3011     }
3012
3013     return 0;
3014 }
3015
3016 #define MERGE(field) dst->field += src->field; src->field=0
3017 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3018     MERGE(me.scene_change_score);
3019     MERGE(me.mc_mb_var_sum_temp);
3020     MERGE(me.mb_var_sum_temp);
3021 }
3022
3023 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3024     int i;
3025
3026     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3027     MERGE(dct_count[1]);
3028     MERGE(mv_bits);
3029     MERGE(i_tex_bits);
3030     MERGE(p_tex_bits);
3031     MERGE(i_count);
3032     MERGE(f_count);
3033     MERGE(b_count);
3034     MERGE(skip_count);
3035     MERGE(misc_bits);
3036     MERGE(error_count);
3037     MERGE(padding_bug_score);
3038     MERGE(current_picture.f.error[0]);
3039     MERGE(current_picture.f.error[1]);
3040     MERGE(current_picture.f.error[2]);
3041
3042     if(dst->avctx->noise_reduction){
3043         for(i=0; i<64; i++){
3044             MERGE(dct_error_sum[0][i]);
3045             MERGE(dct_error_sum[1][i]);
3046         }
3047     }
3048
3049     assert(put_bits_count(&src->pb) % 8 ==0);
3050     assert(put_bits_count(&dst->pb) % 8 ==0);
3051     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3052     flush_put_bits(&dst->pb);
3053 }
3054
3055 static int estimate_qp(MpegEncContext *s, int dry_run){
3056     if (s->next_lambda){
3057         s->current_picture_ptr->f.quality =
3058         s->current_picture.f.quality = s->next_lambda;
3059         if(!dry_run) s->next_lambda= 0;
3060     } else if (!s->fixed_qscale) {
3061         s->current_picture_ptr->f.quality =
3062         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3063         if (s->current_picture.f.quality < 0)
3064             return -1;
3065     }
3066
3067     if(s->adaptive_quant){
3068         switch(s->codec_id){
3069         case AV_CODEC_ID_MPEG4:
3070             if (CONFIG_MPEG4_ENCODER)
3071                 ff_clean_mpeg4_qscales(s);
3072             break;
3073         case AV_CODEC_ID_H263:
3074         case AV_CODEC_ID_H263P:
3075         case AV_CODEC_ID_FLV1:
3076             if (CONFIG_H263_ENCODER)
3077                 ff_clean_h263_qscales(s);
3078             break;
3079         default:
3080             ff_init_qscale_tab(s);
3081         }
3082
3083         s->lambda= s->lambda_table[0];
3084         //FIXME broken
3085     }else
3086         s->lambda = s->current_picture.f.quality;
3087     update_qscale(s);
3088     return 0;
3089 }
3090
3091 /* must be called before writing the header */
3092 static void set_frame_distances(MpegEncContext * s){
3093     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3094     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3095
3096     if(s->pict_type==AV_PICTURE_TYPE_B){
3097         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3098         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3099     }else{
3100         s->pp_time= s->time - s->last_non_b_time;
3101         s->last_non_b_time= s->time;
3102         assert(s->picture_number==0 || s->pp_time > 0);
3103     }
3104 }
3105
3106 static int encode_picture(MpegEncContext *s, int picture_number)
3107 {
3108     int i;
3109     int bits;
3110     int context_count = s->slice_context_count;
3111
3112     s->picture_number = picture_number;
3113
3114     /* Reset the average MB variance */
3115     s->me.mb_var_sum_temp    =
3116     s->me.mc_mb_var_sum_temp = 0;
3117
3118     /* we need to initialize some time vars before we can encode b-frames */
3119     // RAL: Condition added for MPEG1VIDEO
3120     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3121         set_frame_distances(s);
3122     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3123         ff_set_mpeg4_time(s);
3124
3125     s->me.scene_change_score=0;
3126
3127 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3128
3129     if(s->pict_type==AV_PICTURE_TYPE_I){
3130         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3131         else                        s->no_rounding=0;
3132     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3133         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3134             s->no_rounding ^= 1;
3135     }
3136
3137     if(s->flags & CODEC_FLAG_PASS2){
3138         if (estimate_qp(s,1) < 0)
3139             return -1;
3140         ff_get_2pass_fcode(s);
3141     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3142         if(s->pict_type==AV_PICTURE_TYPE_B)
3143             s->lambda= s->last_lambda_for[s->pict_type];
3144         else
3145             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3146         update_qscale(s);
3147     }
3148
3149     s->mb_intra=0; //for the rate distortion & bit compare functions
3150     for(i=1; i<context_count; i++){
3151         ff_update_duplicate_context(s->thread_context[i], s);
3152     }
3153
3154     if(ff_init_me(s)<0)
3155         return -1;
3156
3157     /* Estimate motion for every MB */
3158     if(s->pict_type != AV_PICTURE_TYPE_I){
3159         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3160         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3161         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3162             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3163                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3164             }
3165         }
3166
3167         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3168     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3169         /* I-Frame */
3170         for(i=0; i<s->mb_stride*s->mb_height; i++)
3171             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3172
3173         if(!s->fixed_qscale){
3174             /* finding spatial complexity for I-frame rate control */
3175             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3176         }
3177     }
3178     for(i=1; i<context_count; i++){
3179         merge_context_after_me(s, s->thread_context[i]);
3180     }
3181     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3182     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3183     emms_c();
3184
3185     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3186         s->pict_type= AV_PICTURE_TYPE_I;
3187         for(i=0; i<s->mb_stride*s->mb_height; i++)
3188             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3189         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3190                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3191     }
3192
3193     if(!s->umvplus){
3194         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3195             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3196
3197             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3198                 int a,b;
3199                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3200                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3201                 s->f_code= FFMAX3(s->f_code, a, b);
3202             }
3203
3204             ff_fix_long_p_mvs(s);
3205             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3206             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3207                 int j;
3208                 for(i=0; i<2; i++){
3209                     for(j=0; j<2; j++)
3210                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3211                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3212                 }
3213             }
3214         }
3215
3216         if(s->pict_type==AV_PICTURE_TYPE_B){
3217             int a, b;
3218
3219             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3220             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3221             s->f_code = FFMAX(a, b);
3222
3223             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3224             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3225             s->b_code = FFMAX(a, b);
3226
3227             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3228             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3229             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3230             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3231             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3232                 int dir, j;
3233                 for(dir=0; dir<2; dir++){
3234                     for(i=0; i<2; i++){
3235                         for(j=0; j<2; j++){
3236                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3237                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3238                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3239                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3240                         }
3241                     }
3242                 }
3243             }
3244         }
3245     }
3246
3247     if (estimate_qp(s, 0) < 0)
3248         return -1;
3249
3250     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3251         s->qscale= 3; //reduce clipping problems
3252
3253     if (s->out_format == FMT_MJPEG) {
3254         /* for mjpeg, we do include qscale in the matrix */
3255         for(i=1;i<64;i++){
3256             int j= s->dsp.idct_permutation[i];
3257
3258             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3259         }
3260         s->y_dc_scale_table=
3261         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3262         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3263         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3264                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3265         s->qscale= 8;
3266     }
3267
3268     //FIXME var duplication
3269     s->current_picture_ptr->f.key_frame =
3270     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3271     s->current_picture_ptr->f.pict_type =
3272     s->current_picture.f.pict_type = s->pict_type;
3273
3274     if (s->current_picture.f.key_frame)
3275         s->picture_in_gop_number=0;
3276
3277     s->last_bits= put_bits_count(&s->pb);
3278     switch(s->out_format) {
3279     case FMT_MJPEG:
3280         if (CONFIG_MJPEG_ENCODER)
3281             ff_mjpeg_encode_picture_header(s);
3282         break;
3283     case FMT_H261:
3284         if (CONFIG_H261_ENCODER)
3285             ff_h261_encode_picture_header(s, picture_number);
3286         break;
3287     case FMT_H263:
3288         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3289             ff_wmv2_encode_picture_header(s, picture_number);
3290         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3291             ff_msmpeg4_encode_picture_header(s, picture_number);
3292         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3293             ff_mpeg4_encode_picture_header(s, picture_number);
3294         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3295             ff_rv10_encode_picture_header(s, picture_number);
3296         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3297             ff_rv20_encode_picture_header(s, picture_number);
3298         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3299             ff_flv_encode_picture_header(s, picture_number);
3300         else if (CONFIG_H263_ENCODER)
3301             ff_h263_encode_picture_header(s, picture_number);
3302         break;
3303     case FMT_MPEG1:
3304         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3305             ff_mpeg1_encode_picture_header(s, picture_number);
3306         break;
3307     case FMT_H264:
3308         break;
3309     default:
3310         assert(0);
3311     }
3312     bits= put_bits_count(&s->pb);
3313     s->header_bits= bits - s->last_bits;
3314
3315     for(i=1; i<context_count; i++){
3316         update_duplicate_context_after_me(s->thread_context[i], s);
3317     }
3318     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3319     for(i=1; i<context_count; i++){
3320         merge_context_after_encode(s, s->thread_context[i]);
3321     }
3322     emms_c();
3323     return 0;
3324 }
3325
3326 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3327     const int intra= s->mb_intra;
3328     int i;
3329
3330     s->dct_count[intra]++;
3331
3332     for(i=0; i<64; i++){
3333         int level= block[i];
3334
3335         if(level){
3336             if(level>0){
3337                 s->dct_error_sum[intra][i] += level;
3338                 level -= s->dct_offset[intra][i];
3339                 if(level<0) level=0;
3340             }else{
3341                 s->dct_error_sum[intra][i] -= level;
3342                 level += s->dct_offset[intra][i];
3343                 if(level>0) level=0;
3344             }
3345             block[i]= level;
3346         }
3347     }
3348 }
3349
3350 static int dct_quantize_trellis_c(MpegEncContext *s,
3351                                   DCTELEM *block, int n,
3352                                   int qscale, int *overflow){
3353     const int *qmat;
3354     const uint8_t *scantable= s->intra_scantable.scantable;
3355     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3356     int max=0;
3357     unsigned int threshold1, threshold2;
3358     int bias=0;
3359     int run_tab[65];
3360     int level_tab[65];
3361     int score_tab[65];
3362     int survivor[65];
3363     int survivor_count;
3364     int last_run=0;
3365     int last_level=0;
3366     int last_score= 0;
3367     int last_i;
3368     int coeff[2][64];
3369     int coeff_count[64];
3370     int qmul, qadd, start_i, last_non_zero, i, dc;
3371     const int esc_length= s->ac_esc_length;
3372     uint8_t * length;
3373     uint8_t * last_length;
3374     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3375
3376     s->dsp.fdct (block);
3377
3378     if(s->dct_error_sum)
3379         s->denoise_dct(s, block);
3380     qmul= qscale*16;
3381     qadd= ((qscale-1)|1)*8;
3382
3383     if (s->mb_intra) {
3384         int q;
3385         if (!s->h263_aic) {
3386             if (n < 4)
3387                 q = s->y_dc_scale;
3388             else
3389                 q = s->c_dc_scale;
3390             q = q << 3;
3391         } else{
3392             /* For AIC we skip quant/dequant of INTRADC */
3393             q = 1 << 3;
3394             qadd=0;
3395         }
3396
3397         /* note: block[0] is assumed to be positive */
3398         block[0] = (block[0] + (q >> 1)) / q;
3399         start_i = 1;
3400         last_non_zero = 0;
3401         qmat = s->q_intra_matrix[qscale];
3402         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3403             bias= 1<<(QMAT_SHIFT-1);
3404         length     = s->intra_ac_vlc_length;
3405         last_length= s->intra_ac_vlc_last_length;
3406     } else {
3407         start_i = 0;
3408         last_non_zero = -1;
3409         qmat = s->q_inter_matrix[qscale];
3410         length     = s->inter_ac_vlc_length;
3411         last_length= s->inter_ac_vlc_last_length;
3412     }
3413     last_i= start_i;
3414
3415     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3416     threshold2= (threshold1<<1);
3417
3418     for(i=63; i>=start_i; i--) {
3419         const int j = scantable[i];
3420         int level = block[j] * qmat[j];
3421
3422         if(((unsigned)(level+threshold1))>threshold2){
3423             last_non_zero = i;
3424             break;
3425         }
3426     }
3427
3428     for(i=start_i; i<=last_non_zero; i++) {
3429         const int j = scantable[i];
3430         int level = block[j] * qmat[j];
3431
3432 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3433 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3434         if(((unsigned)(level+threshold1))>threshold2){
3435             if(level>0){
3436                 level= (bias + level)>>QMAT_SHIFT;
3437                 coeff[0][i]= level;
3438                 coeff[1][i]= level-1;
3439 //                coeff[2][k]= level-2;
3440             }else{
3441                 level= (bias - level)>>QMAT_SHIFT;
3442                 coeff[0][i]= -level;
3443                 coeff[1][i]= -level+1;
3444 //                coeff[2][k]= -level+2;
3445             }
3446             coeff_count[i]= FFMIN(level, 2);
3447             assert(coeff_count[i]);
3448             max |=level;
3449         }else{
3450             coeff[0][i]= (level>>31)|1;
3451             coeff_count[i]= 1;
3452         }
3453     }
3454
3455     *overflow= s->max_qcoeff < max; //overflow might have happened
3456
3457     if(last_non_zero < start_i){
3458         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3459         return last_non_zero;
3460     }
3461
3462     score_tab[start_i]= 0;
3463     survivor[0]= start_i;
3464     survivor_count= 1;
3465
3466     for(i=start_i; i<=last_non_zero; i++){
3467         int level_index, j, zero_distortion;
3468         int dct_coeff= FFABS(block[ scantable[i] ]);
3469         int best_score=256*256*256*120;
3470
3471         if (s->dsp.fdct == ff_fdct_ifast)
3472             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3473         zero_distortion= dct_coeff*dct_coeff;
3474
3475         for(level_index=0; level_index < coeff_count[i]; level_index++){
3476             int distortion;
3477             int level= coeff[level_index][i];
3478             const int alevel= FFABS(level);
3479             int unquant_coeff;
3480
3481             assert(level);
3482
3483             if(s->out_format == FMT_H263){
3484                 unquant_coeff= alevel*qmul + qadd;
3485             }else{ //MPEG1
3486                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3487                 if(s->mb_intra){
3488                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3489                         unquant_coeff =   (unquant_coeff - 1) | 1;
3490                 }else{
3491                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3492                         unquant_coeff =   (unquant_coeff - 1) | 1;
3493                 }
3494                 unquant_coeff<<= 3;
3495             }
3496
3497             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3498             level+=64;
3499             if((level&(~127)) == 0){
3500                 for(j=survivor_count-1; j>=0; j--){
3501                     int run= i - survivor[j];
3502                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3503                     score += score_tab[i-run];
3504
3505                     if(score < best_score){
3506                         best_score= score;
3507                         run_tab[i+1]= run;
3508                         level_tab[i+1]= level-64;
3509                     }
3510                 }
3511
3512                 if(s->out_format == FMT_H263){
3513                     for(j=survivor_count-1; j>=0; j--){
3514                         int run= i - survivor[j];
3515                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3516                         score += score_tab[i-run];
3517                         if(score < last_score){
3518                             last_score= score;
3519                             last_run= run;
3520                             last_level= level-64;
3521                             last_i= i+1;
3522                         }
3523                     }
3524                 }
3525             }else{
3526                 distortion += esc_length*lambda;
3527                 for(j=survivor_count-1; j>=0; j--){
3528                     int run= i - survivor[j];
3529                     int score= distortion + score_tab[i-run];
3530
3531                     if(score < best_score){
3532                         best_score= score;
3533                         run_tab[i+1]= run;
3534                         level_tab[i+1]= level-64;
3535                     }
3536                 }
3537
3538                 if(s->out_format == FMT_H263){
3539                   for(j=survivor_count-1; j>=0; j--){
3540                         int run= i - survivor[j];
3541                         int score= distortion + score_tab[i-run];
3542                         if(score < last_score){
3543                             last_score= score;
3544                             last_run= run;
3545                             last_level= level-64;
3546                             last_i= i+1;
3547                         }
3548                     }
3549                 }
3550             }
3551         }
3552
3553         score_tab[i+1]= best_score;
3554
3555         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3556         if(last_non_zero <= 27){
3557             for(; survivor_count; survivor_count--){
3558                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3559                     break;
3560             }
3561         }else{
3562             for(; survivor_count; survivor_count--){
3563                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3564                     break;
3565             }
3566         }
3567
3568         survivor[ survivor_count++ ]= i+1;
3569     }
3570
3571     if(s->out_format != FMT_H263){
3572         last_score= 256*256*256*120;
3573         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3574             int score= score_tab[i];
3575             if(i) score += lambda*2; //FIXME exacter?
3576
3577             if(score < last_score){
3578                 last_score= score;
3579                 last_i= i;
3580                 last_level= level_tab[i];
3581                 last_run= run_tab[i];
3582             }
3583         }
3584     }
3585
3586     s->coded_score[n] = last_score;
3587
3588     dc= FFABS(block[0]);
3589     last_non_zero= last_i - 1;
3590     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3591
3592     if(last_non_zero < start_i)
3593         return last_non_zero;
3594
3595     if(last_non_zero == 0 && start_i == 0){
3596         int best_level= 0;
3597         int best_score= dc * dc;
3598
3599         for(i=0; i<coeff_count[0]; i++){
3600             int level= coeff[i][0];
3601             int alevel= FFABS(level);
3602             int unquant_coeff, score, distortion;
3603
3604             if(s->out_format == FMT_H263){
3605                     unquant_coeff= (alevel*qmul + qadd)>>3;
3606             }else{ //MPEG1
3607                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3608                     unquant_coeff =   (unquant_coeff - 1) | 1;
3609             }
3610             unquant_coeff = (unquant_coeff + 4) >> 3;
3611             unquant_coeff<<= 3 + 3;
3612
3613             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3614             level+=64;
3615             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3616             else                    score= distortion + esc_length*lambda;
3617
3618             if(score < best_score){
3619                 best_score= score;
3620                 best_level= level - 64;
3621             }
3622         }
3623         block[0]= best_level;
3624         s->coded_score[n] = best_score - dc*dc;
3625         if(best_level == 0) return -1;
3626         else                return last_non_zero;
3627     }
3628
3629     i= last_i;
3630     assert(last_level);
3631
3632     block[ perm_scantable[last_non_zero] ]= last_level;
3633     i -= last_run + 1;
3634
3635     for(; i>start_i; i -= run_tab[i] + 1){
3636         block[ perm_scantable[i-1] ]= level_tab[i];
3637     }
3638
3639     return last_non_zero;
3640 }
3641
3642 //#define REFINE_STATS 1
3643 static int16_t basis[64][64];
3644
3645 static void build_basis(uint8_t *perm){
3646     int i, j, x, y;
3647     emms_c();
3648     for(i=0; i<8; i++){
3649         for(j=0; j<8; j++){
3650             for(y=0; y<8; y++){
3651                 for(x=0; x<8; x++){
3652                     double s= 0.25*(1<<BASIS_SHIFT);
3653                     int index= 8*i + j;
3654                     int perm_index= perm[index];
3655                     if(i==0) s*= sqrt(0.5);
3656                     if(j==0) s*= sqrt(0.5);
3657                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3658                 }
3659             }
3660         }
3661     }
3662 }
3663
3664 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3665                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3666                         int n, int qscale){
3667     int16_t rem[64];
3668     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3669     const uint8_t *scantable= s->intra_scantable.scantable;
3670     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3671 //    unsigned int threshold1, threshold2;
3672 //    int bias=0;
3673     int run_tab[65];
3674     int prev_run=0;
3675     int prev_level=0;
3676     int qmul, qadd, start_i, last_non_zero, i, dc;
3677     uint8_t * length;
3678     uint8_t * last_length;
3679     int lambda;
3680     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3681 #ifdef REFINE_STATS
3682 static int count=0;
3683 static int after_last=0;
3684 static int to_zero=0;
3685 static int from_zero=0;
3686 static int raise=0;
3687 static int lower=0;
3688 static int messed_sign=0;
3689 #endif
3690
3691     if(basis[0][0] == 0)
3692         build_basis(s->dsp.idct_permutation);
3693
3694     qmul= qscale*2;
3695     qadd= (qscale-1)|1;
3696     if (s->mb_intra) {
3697         if (!s->h263_aic) {
3698             if (n < 4)
3699                 q = s->y_dc_scale;
3700             else
3701                 q = s->c_dc_scale;
3702         } else{
3703             /* For AIC we skip quant/dequant of INTRADC */
3704             q = 1;
3705             qadd=0;
3706         }
3707         q <<= RECON_SHIFT-3;
3708         /* note: block[0] is assumed to be positive */
3709         dc= block[0]*q;
3710 //        block[0] = (block[0] + (q >> 1)) / q;
3711         start_i = 1;
3712 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3713 //            bias= 1<<(QMAT_SHIFT-1);
3714         length     = s->intra_ac_vlc_length;
3715         last_length= s->intra_ac_vlc_last_length;
3716     } else {
3717         dc= 0;
3718         start_i = 0;
3719         length     = s->inter_ac_vlc_length;
3720         last_length= s->inter_ac_vlc_last_length;
3721     }
3722     last_non_zero = s->block_last_index[n];
3723
3724 #ifdef REFINE_STATS
3725 {START_TIMER
3726 #endif
3727     dc += (1<<(RECON_SHIFT-1));
3728     for(i=0; i<64; i++){
3729         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3730     }
3731 #ifdef REFINE_STATS
3732 STOP_TIMER("memset rem[]")}
3733 #endif
3734     sum=0;
3735     for(i=0; i<64; i++){
3736         int one= 36;
3737         int qns=4;
3738         int w;
3739
3740         w= FFABS(weight[i]) + qns*one;
3741         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3742
3743         weight[i] = w;
3744 //        w=weight[i] = (63*qns + (w/2)) / w;
3745
3746         assert(w>0);
3747         assert(w<(1<<6));
3748         sum += w*w;
3749     }
3750     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3751 #ifdef REFINE_STATS
3752 {START_TIMER
3753 #endif
3754     run=0;
3755     rle_index=0;
3756     for(i=start_i; i<=last_non_zero; i++){
3757         int j= perm_scantable[i];
3758         const int level= block[j];
3759         int coeff;
3760
3761         if(level){
3762             if(level<0) coeff= qmul*level - qadd;
3763             else        coeff= qmul*level + qadd;
3764             run_tab[rle_index++]=run;
3765             run=0;
3766
3767             s->dsp.add_8x8basis(rem, basis[j], coeff);
3768         }else{
3769             run++;
3770         }
3771     }
3772 #ifdef REFINE_STATS
3773 if(last_non_zero>0){
3774 STOP_TIMER("init rem[]")
3775 }
3776 }
3777
3778 {START_TIMER
3779 #endif
3780     for(;;){
3781         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3782         int best_coeff=0;
3783         int best_change=0;
3784         int run2, best_unquant_change=0, analyze_gradient;
3785 #ifdef REFINE_STATS
3786 {START_TIMER
3787 #endif
3788         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3789
3790         if(analyze_gradient){
3791 #ifdef REFINE_STATS
3792 {START_TIMER
3793 #endif
3794             for(i=0; i<64; i++){
3795                 int w= weight[i];
3796
3797                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3798             }
3799 #ifdef REFINE_STATS
3800 STOP_TIMER("rem*w*w")}
3801 {START_TIMER
3802 #endif
3803             s->dsp.fdct(d1);
3804 #ifdef REFINE_STATS
3805 STOP_TIMER("dct")}
3806 #endif
3807         }
3808
3809         if(start_i){
3810             const int level= block[0];
3811             int change, old_coeff;
3812
3813             assert(s->mb_intra);
3814
3815             old_coeff= q*level;
3816
3817             for(change=-1; change<=1; change+=2){
3818                 int new_level= level + change;
3819                 int score, new_coeff;
3820
3821                 new_coeff= q*new_level;
3822                 if(new_coeff >= 2048 || new_coeff < 0)
3823                     continue;
3824
3825                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3826                 if(score<best_score){
3827                     best_score= score;
3828                     best_coeff= 0;
3829                     best_change= change;
3830                     best_unquant_change= new_coeff - old_coeff;
3831                 }
3832             }
3833         }
3834
3835         run=0;
3836         rle_index=0;
3837         run2= run_tab[rle_index++];
3838         prev_level=0;
3839         prev_run=0;
3840
3841         for(i=start_i; i<64; i++){
3842             int j= perm_scantable[i];
3843             const int level= block[j];
3844             int change, old_coeff;
3845
3846             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3847                 break;
3848
3849             if(level){
3850                 if(level<0) old_coeff= qmul*level - qadd;
3851                 else        old_coeff= qmul*level + qadd;
3852                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3853             }else{
3854                 old_coeff=0;
3855                 run2--;
3856                 assert(run2>=0 || i >= last_non_zero );
3857             }
3858
3859             for(change=-1; change<=1; change+=2){
3860                 int new_level= level + change;
3861                 int score, new_coeff, unquant_change;
3862
3863                 score=0;
3864                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3865                    continue;
3866
3867                 if(new_level){
3868                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3869                     else            new_coeff= qmul*new_level + qadd;
3870                     if(new_coeff >= 2048 || new_coeff <= -2048)
3871                         continue;
3872                     //FIXME check for overflow
3873
3874                     if(level){
3875                         if(level < 63 && level > -63){
3876                             if(i < last_non_zero)
3877                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3878                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3879                             else
3880                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3881                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3882                         }
3883                     }else{
3884                         assert(FFABS(new_level)==1);
3885
3886                         if(analyze_gradient){
3887                             int g= d1[ scantable[i] ];
3888                             if(g && (g^new_level) >= 0)
3889                                 continue;
3890                         }
3891
3892                         if(i < last_non_zero){
3893                             int next_i= i + run2 + 1;
3894                             int next_level= block[ perm_scantable[next_i] ] + 64;
3895
3896                             if(next_level&(~127))
3897                                 next_level= 0;
3898
3899                             if(next_i < last_non_zero)
3900                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3901                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3902                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3903                             else
3904                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3905                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3906                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3907                         }else{
3908                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3909                             if(prev_level){
3910                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3911                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3912                             }
3913                         }
3914                     }
3915                 }else{
3916                     new_coeff=0;
3917                     assert(FFABS(level)==1);
3918
3919                     if(i < last_non_zero){
3920                         int next_i= i + run2 + 1;
3921                         int next_level= block[ perm_scantable[next_i] ] + 64;
3922
3923                         if(next_level&(~127))
3924                             next_level= 0;
3925
3926                         if(next_i < last_non_zero)
3927                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3928                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3929                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3930                         else
3931                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3932                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3933                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3934                     }else{
3935                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3936                         if(prev_level){
3937                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3938                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3939                         }
3940                     }
3941                 }
3942
3943                 score *= lambda;
3944
3945                 unquant_change= new_coeff - old_coeff;
3946                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3947
3948                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3949                 if(score<best_score){
3950                     best_score= score;
3951                     best_coeff= i;
3952                     best_change= change;
3953                     best_unquant_change= unquant_change;
3954                 }
3955             }
3956             if(level){
3957                 prev_level= level + 64;
3958                 if(prev_level&(~127))
3959                     prev_level= 0;
3960                 prev_run= run;
3961                 run=0;
3962             }else{
3963                 run++;
3964             }
3965         }
3966 #ifdef REFINE_STATS
3967 STOP_TIMER("iterative step")}
3968 #endif
3969
3970         if(best_change){
3971             int j= perm_scantable[ best_coeff ];
3972
3973             block[j] += best_change;
3974
3975             if(best_coeff > last_non_zero){
3976                 last_non_zero= best_coeff;
3977                 assert(block[j]);
3978 #ifdef REFINE_STATS
3979 after_last++;
3980 #endif
3981             }else{
3982 #ifdef REFINE_STATS
3983 if(block[j]){
3984     if(block[j] - best_change){
3985         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3986             raise++;
3987         }else{
3988             lower++;
3989         }
3990     }else{
3991         from_zero++;
3992     }
3993 }else{
3994     to_zero++;
3995 }
3996 #endif
3997                 for(; last_non_zero>=start_i; last_non_zero--){
3998                     if(block[perm_scantable[last_non_zero]])
3999                         break;
4000                 }
4001             }
4002 #ifdef REFINE_STATS
4003 count++;
4004 if(256*256*256*64 % count == 0){
4005     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4006 }
4007 #endif
4008             run=0;
4009             rle_index=0;
4010             for(i=start_i; i<=last_non_zero; i++){
4011                 int j= perm_scantable[i];
4012                 const int level= block[j];
4013
4014                  if(level){
4015                      run_tab[rle_index++]=run;
4016                      run=0;
4017                  }else{
4018                      run++;
4019                  }
4020             }
4021
4022             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4023         }else{
4024             break;
4025         }
4026     }
4027 #ifdef REFINE_STATS
4028 if(last_non_zero>0){
4029 STOP_TIMER("iterative search")
4030 }
4031 }
4032 #endif
4033
4034     return last_non_zero;
4035 }
4036
4037 int ff_dct_quantize_c(MpegEncContext *s,
4038                         DCTELEM *block, int n,
4039                         int qscale, int *overflow)
4040 {
4041     int i, j, level, last_non_zero, q, start_i;
4042     const int *qmat;
4043     const uint8_t *scantable= s->intra_scantable.scantable;
4044     int bias;
4045     int max=0;
4046     unsigned int threshold1, threshold2;
4047
4048     s->dsp.fdct (block);
4049
4050     if(s->dct_error_sum)
4051         s->denoise_dct(s, block);
4052
4053     if (s->mb_intra) {
4054         if (!s->h263_aic) {
4055             if (n < 4)
4056                 q = s->y_dc_scale;
4057             else
4058                 q = s->c_dc_scale;
4059             q = q << 3;
4060         } else
4061             /* For AIC we skip quant/dequant of INTRADC */
4062             q = 1 << 3;
4063
4064         /* note: block[0] is assumed to be positive */
4065         block[0] = (block[0] + (q >> 1)) / q;
4066         start_i = 1;
4067         last_non_zero = 0;
4068         qmat = s->q_intra_matrix[qscale];
4069         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4070     } else {
4071         start_i = 0;
4072         last_non_zero = -1;
4073         qmat = s->q_inter_matrix[qscale];
4074         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4075     }
4076     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4077     threshold2= (threshold1<<1);
4078     for(i=63;i>=start_i;i--) {
4079         j = scantable[i];
4080         level = block[j] * qmat[j];
4081
4082         if(((unsigned)(level+threshold1))>threshold2){
4083             last_non_zero = i;
4084             break;
4085         }else{
4086             block[j]=0;
4087         }
4088     }
4089     for(i=start_i; i<=last_non_zero; i++) {
4090         j = scantable[i];
4091         level = block[j] * qmat[j];
4092
4093 //        if(   bias+level >= (1<<QMAT_SHIFT)
4094 //           || bias-level >= (1<<QMAT_SHIFT)){
4095         if(((unsigned)(level+threshold1))>threshold2){
4096             if(level>0){
4097                 level= (bias + level)>>QMAT_SHIFT;
4098                 block[j]= level;
4099             }else{
4100                 level= (bias - level)>>QMAT_SHIFT;
4101                 block[j]= -level;
4102             }
4103             max |=level;
4104         }else{
4105             block[j]=0;
4106         }
4107     }
4108     *overflow= s->max_qcoeff < max; //overflow might have happened
4109
4110     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4111     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4112         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4113
4114     return last_non_zero;
4115 }
4116
4117 #define OFFSET(x) offsetof(MpegEncContext, x)
4118 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4119 static const AVOption h263_options[] = {
4120     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4121     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4122     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4123     FF_MPV_COMMON_OPTS
4124     { NULL },
4125 };
4126
4127 static const AVClass h263_class = {
4128     .class_name = "H.263 encoder",
4129     .item_name  = av_default_item_name,
4130     .option     = h263_options,
4131     .version    = LIBAVUTIL_VERSION_INT,
4132 };
4133
4134 AVCodec ff_h263_encoder = {
4135     .name           = "h263",
4136     .type           = AVMEDIA_TYPE_VIDEO,
4137     .id             = AV_CODEC_ID_H263,
4138     .priv_data_size = sizeof(MpegEncContext),
4139     .init           = ff_MPV_encode_init,
4140     .encode2        = ff_MPV_encode_picture,
4141     .close          = ff_MPV_encode_end,
4142     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4143     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4144     .priv_class     = &h263_class,
4145 };
4146
4147 static const AVOption h263p_options[] = {
4148     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4149     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4150     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4151     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4152     FF_MPV_COMMON_OPTS
4153     { NULL },
4154 };
4155 static const AVClass h263p_class = {
4156     .class_name = "H.263p encoder",
4157     .item_name  = av_default_item_name,
4158     .option     = h263p_options,
4159     .version    = LIBAVUTIL_VERSION_INT,
4160 };
4161
4162 AVCodec ff_h263p_encoder = {
4163     .name           = "h263p",
4164     .type           = AVMEDIA_TYPE_VIDEO,
4165     .id             = AV_CODEC_ID_H263P,
4166     .priv_data_size = sizeof(MpegEncContext),
4167     .init           = ff_MPV_encode_init,
4168     .encode2        = ff_MPV_encode_picture,
4169     .close          = ff_MPV_encode_end,
4170     .capabilities   = CODEC_CAP_SLICE_THREADS,
4171     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4172     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4173     .priv_class     = &h263p_class,
4174 };
4175
4176 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4177
4178 AVCodec ff_msmpeg4v2_encoder = {
4179     .name           = "msmpeg4v2",
4180     .type           = AVMEDIA_TYPE_VIDEO,
4181     .id             = AV_CODEC_ID_MSMPEG4V2,
4182     .priv_data_size = sizeof(MpegEncContext),
4183     .init           = ff_MPV_encode_init,
4184     .encode2        = ff_MPV_encode_picture,
4185     .close          = ff_MPV_encode_end,
4186     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4187     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4188     .priv_class     = &msmpeg4v2_class,
4189 };
4190
4191 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4192
4193 AVCodec ff_msmpeg4v3_encoder = {
4194     .name           = "msmpeg4",
4195     .type           = AVMEDIA_TYPE_VIDEO,
4196     .id             = AV_CODEC_ID_MSMPEG4V3,
4197     .priv_data_size = sizeof(MpegEncContext),
4198     .init           = ff_MPV_encode_init,
4199     .encode2        = ff_MPV_encode_picture,
4200     .close          = ff_MPV_encode_end,
4201     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4202     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4203     .priv_class     = &msmpeg4v3_class,
4204 };
4205
4206 FF_MPV_GENERIC_CLASS(wmv1)
4207
4208 AVCodec ff_wmv1_encoder = {
4209     .name           = "wmv1",
4210     .type           = AVMEDIA_TYPE_VIDEO,
4211     .id             = AV_CODEC_ID_WMV1,
4212     .priv_data_size = sizeof(MpegEncContext),
4213     .init           = ff_MPV_encode_init,
4214     .encode2        = ff_MPV_encode_picture,
4215     .close          = ff_MPV_encode_end,
4216     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4217     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4218     .priv_class     = &wmv1_class,
4219 };