]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge remote-tracking branch 'qatar/master'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "mpegvideo_common.h"
37 #include "h263.h"
38 #include "mjpegenc.h"
39 #include "msmpeg4.h"
40 #include "faandct.h"
41 #include "thread.h"
42 #include "aandcttab.h"
43 #include "flv.h"
44 #include "mpeg4video.h"
45 #include "internal.h"
46 #include "bytestream.h"
47 #include <limits.h>
48 #include "sp5x.h"
49
50 //#undef NDEBUG
51 //#include <assert.h>
52
53 static int encode_picture(MpegEncContext *s, int picture_number);
54 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
55 static int sse_mb(MpegEncContext *s);
56 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
57 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
58
59 /* enable all paranoid tests for rounding, overflows, etc... */
60 //#define PARANOID
61
62 //#define DEBUG
63
64 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
65 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
66
67 const AVOption ff_mpv_generic_options[] = {
68     FF_MPV_COMMON_OPTS
69     { NULL },
70 };
71
72 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
73                        uint16_t (*qmat16)[2][64],
74                        const uint16_t *quant_matrix,
75                        int bias, int qmin, int qmax, int intra)
76 {
77     int qscale;
78     int shift = 0;
79
80     for (qscale = qmin; qscale <= qmax; qscale++) {
81         int i;
82         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
83             dsp->fdct == ff_jpeg_fdct_islow_10 ||
84             dsp->fdct == ff_faandct) {
85             for (i = 0; i < 64; i++) {
86                 const int j = dsp->idct_permutation[i];
87                 /* 16 <= qscale * quant_matrix[i] <= 7905
88                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
89                  *             19952 <=              x  <= 249205026
90                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
91                  *           3444240 >= (1 << 36) / (x) >= 275 */
92
93                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
94                                         (qscale * quant_matrix[j]));
95             }
96         } else if (dsp->fdct == ff_fdct_ifast) {
97             for (i = 0; i < 64; i++) {
98                 const int j = dsp->idct_permutation[i];
99                 /* 16 <= qscale * quant_matrix[i] <= 7905
100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
101                  *             19952 <=              x  <= 249205026
102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
103                  *           3444240 >= (1 << 36) / (x) >= 275 */
104
105                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
106                                         (ff_aanscales[i] * qscale * quant_matrix[j]));
107             }
108         } else {
109             for (i = 0; i < 64; i++) {
110                 const int j = dsp->idct_permutation[i];
111                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
112                  * Assume x = qscale * quant_matrix[i]
113                  * So             16 <=              x  <= 7905
114                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
115                  * so          32768 >= (1 << 19) / (x) >= 67 */
116                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
117                                         (qscale * quant_matrix[j]));
118                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
119                 //                    (qscale * quant_matrix[i]);
120                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
121                                        (qscale * quant_matrix[j]);
122
123                 if (qmat16[qscale][0][i] == 0 ||
124                     qmat16[qscale][0][i] == 128 * 256)
125                     qmat16[qscale][0][i] = 128 * 256 - 1;
126                 qmat16[qscale][1][i] =
127                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
128                                 qmat16[qscale][0][i]);
129             }
130         }
131
132         for (i = intra; i < 64; i++) {
133             int64_t max = 8191;
134             if (dsp->fdct == ff_fdct_ifast) {
135                 max = (8191LL * ff_aanscales[i]) >> 14;
136             }
137             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
138                 shift++;
139             }
140         }
141     }
142     if (shift) {
143         av_log(NULL, AV_LOG_INFO,
144                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
145                QMAT_SHIFT - shift);
146     }
147 }
148
149 static inline void update_qscale(MpegEncContext *s)
150 {
151     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
152                 (FF_LAMBDA_SHIFT + 7);
153     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
154
155     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
156                  FF_LAMBDA_SHIFT;
157 }
158
159 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
160 {
161     int i;
162
163     if (matrix) {
164         put_bits(pb, 1, 1);
165         for (i = 0; i < 64; i++) {
166             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
167         }
168     } else
169         put_bits(pb, 1, 0);
170 }
171
172 /**
173  * init s->current_picture.qscale_table from s->lambda_table
174  */
175 void ff_init_qscale_tab(MpegEncContext *s)
176 {
177     int8_t * const qscale_table = s->current_picture.f.qscale_table;
178     int i;
179
180     for (i = 0; i < s->mb_num; i++) {
181         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
182         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
183         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
184                                                   s->avctx->qmax);
185     }
186 }
187
188 static void copy_picture_attributes(MpegEncContext *s,
189                                     AVFrame *dst,
190                                     AVFrame *src)
191 {
192     int i;
193
194     dst->pict_type              = src->pict_type;
195     dst->quality                = src->quality;
196     dst->coded_picture_number   = src->coded_picture_number;
197     dst->display_picture_number = src->display_picture_number;
198     //dst->reference              = src->reference;
199     dst->pts                    = src->pts;
200     dst->interlaced_frame       = src->interlaced_frame;
201     dst->top_field_first        = src->top_field_first;
202
203     if (s->avctx->me_threshold) {
204         if (!src->motion_val[0])
205             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
206         if (!src->mb_type)
207             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
208         if (!src->ref_index[0])
209             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
210         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
211             av_log(s->avctx, AV_LOG_ERROR,
212                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
213                    src->motion_subsample_log2, dst->motion_subsample_log2);
214
215         memcpy(dst->mb_type, src->mb_type,
216                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
217
218         for (i = 0; i < 2; i++) {
219             int stride = ((16 * s->mb_width ) >>
220                           src->motion_subsample_log2) + 1;
221             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
222
223             if (src->motion_val[i] &&
224                 src->motion_val[i] != dst->motion_val[i]) {
225                 memcpy(dst->motion_val[i], src->motion_val[i],
226                        2 * stride * height * sizeof(int16_t));
227             }
228             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
229                 memcpy(dst->ref_index[i], src->ref_index[i],
230                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
231             }
232         }
233     }
234 }
235
236 static void update_duplicate_context_after_me(MpegEncContext *dst,
237                                               MpegEncContext *src)
238 {
239 #define COPY(a) dst->a= src->a
240     COPY(pict_type);
241     COPY(current_picture);
242     COPY(f_code);
243     COPY(b_code);
244     COPY(qscale);
245     COPY(lambda);
246     COPY(lambda2);
247     COPY(picture_in_gop_number);
248     COPY(gop_picture_number);
249     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
250     COPY(progressive_frame);    // FIXME don't set in encode_header
251     COPY(partitioned_frame);    // FIXME don't set in encode_header
252 #undef COPY
253 }
254
255 /**
256  * Set the given MpegEncContext to defaults for encoding.
257  * the changed fields will not depend upon the prior state of the MpegEncContext.
258  */
259 static void MPV_encode_defaults(MpegEncContext *s)
260 {
261     int i;
262     ff_MPV_common_defaults(s);
263
264     for (i = -16; i < 16; i++) {
265         default_fcode_tab[i + MAX_MV] = 1;
266     }
267     s->me.mv_penalty = default_mv_penalty;
268     s->fcode_tab     = default_fcode_tab;
269 }
270
271 /* init video encoder */
272 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
273 {
274     MpegEncContext *s = avctx->priv_data;
275     int i;
276     int chroma_h_shift, chroma_v_shift;
277
278     MPV_encode_defaults(s);
279
280     switch (avctx->codec_id) {
281     case CODEC_ID_MPEG2VIDEO:
282         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
283             avctx->pix_fmt != PIX_FMT_YUV422P) {
284             av_log(avctx, AV_LOG_ERROR,
285                    "only YUV420 and YUV422 are supported\n");
286             return -1;
287         }
288         break;
289     case CODEC_ID_LJPEG:
290         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
291             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
292             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
293             avctx->pix_fmt != PIX_FMT_BGR0     &&
294             avctx->pix_fmt != PIX_FMT_BGRA     &&
295             avctx->pix_fmt != PIX_FMT_BGR24    &&
296             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
297               avctx->pix_fmt != PIX_FMT_YUV422P &&
298               avctx->pix_fmt != PIX_FMT_YUV444P) ||
299              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
300             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
301             return -1;
302         }
303         break;
304     case CODEC_ID_MJPEG:
305     case CODEC_ID_AMV:
306         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
307             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
308             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
309               avctx->pix_fmt != PIX_FMT_YUV422P) ||
310              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
311             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
312             return -1;
313         }
314         break;
315     default:
316         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
317             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
318             return -1;
319         }
320     }
321
322     switch (avctx->pix_fmt) {
323     case PIX_FMT_YUVJ422P:
324     case PIX_FMT_YUV422P:
325         s->chroma_format = CHROMA_422;
326         break;
327     case PIX_FMT_YUVJ420P:
328     case PIX_FMT_YUV420P:
329     default:
330         s->chroma_format = CHROMA_420;
331         break;
332     }
333
334     s->bit_rate = avctx->bit_rate;
335     s->width    = avctx->width;
336     s->height   = avctx->height;
337     if (avctx->gop_size > 600 &&
338         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
339         av_log(avctx, AV_LOG_WARNING,
340                "keyframe interval too large!, reducing it from %d to %d\n",
341                avctx->gop_size, 600);
342         avctx->gop_size = 600;
343     }
344     s->gop_size     = avctx->gop_size;
345     s->avctx        = avctx;
346     s->flags        = avctx->flags;
347     s->flags2       = avctx->flags2;
348     s->max_b_frames = avctx->max_b_frames;
349     s->codec_id     = avctx->codec->id;
350 #if FF_API_MPV_GLOBAL_OPTS
351     if (avctx->luma_elim_threshold)
352         s->luma_elim_threshold   = avctx->luma_elim_threshold;
353     if (avctx->chroma_elim_threshold)
354         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
355 #endif
356     s->strict_std_compliance = avctx->strict_std_compliance;
357     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
358     s->mpeg_quant         = avctx->mpeg_quant;
359     s->rtp_mode           = !!avctx->rtp_payload_size;
360     s->intra_dc_precision = avctx->intra_dc_precision;
361     s->user_specified_pts = AV_NOPTS_VALUE;
362
363     if (s->gop_size <= 1) {
364         s->intra_only = 1;
365         s->gop_size   = 12;
366     } else {
367         s->intra_only = 0;
368     }
369
370     s->me_method = avctx->me_method;
371
372     /* Fixed QSCALE */
373     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
374
375 #if FF_API_MPV_GLOBAL_OPTS
376     if (s->flags & CODEC_FLAG_QP_RD)
377         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
378 #endif
379
380     s->adaptive_quant = (s->avctx->lumi_masking ||
381                          s->avctx->dark_masking ||
382                          s->avctx->temporal_cplx_masking ||
383                          s->avctx->spatial_cplx_masking  ||
384                          s->avctx->p_masking      ||
385                          s->avctx->border_masking ||
386                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
387                         !s->fixed_qscale;
388
389     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
390
391     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
392         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
393         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
394             return -1;
395     }
396
397     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
398         av_log(avctx, AV_LOG_INFO,
399                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
400     }
401
402     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
403         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
404         return -1;
405     }
406
407     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
408         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
409         return -1;
410     }
411
412     if (avctx->rc_max_rate &&
413         avctx->rc_max_rate == avctx->bit_rate &&
414         avctx->rc_max_rate != avctx->rc_min_rate) {
415         av_log(avctx, AV_LOG_INFO,
416                "impossible bitrate constraints, this will fail\n");
417     }
418
419     if (avctx->rc_buffer_size &&
420         avctx->bit_rate * (int64_t)avctx->time_base.num >
421             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
422         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
423         return -1;
424     }
425
426     if (!s->fixed_qscale &&
427         avctx->bit_rate * av_q2d(avctx->time_base) >
428             avctx->bit_rate_tolerance) {
429         av_log(avctx, AV_LOG_ERROR,
430                "bitrate tolerance too small for bitrate\n");
431         return -1;
432     }
433
434     if (s->avctx->rc_max_rate &&
435         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
436         (s->codec_id == CODEC_ID_MPEG1VIDEO ||
437          s->codec_id == CODEC_ID_MPEG2VIDEO) &&
438         90000LL * (avctx->rc_buffer_size - 1) >
439             s->avctx->rc_max_rate * 0xFFFFLL) {
440         av_log(avctx, AV_LOG_INFO,
441                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
442                "specified vbv buffer is too large for the given bitrate!\n");
443     }
444
445     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != CODEC_ID_MPEG4 &&
446         s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P &&
447         s->codec_id != CODEC_ID_FLV1) {
448         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
449         return -1;
450     }
451
452     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
453         av_log(avctx, AV_LOG_ERROR,
454                "OBMC is only supported with simple mb decision\n");
455         return -1;
456     }
457
458     if (s->quarter_sample && s->codec_id != CODEC_ID_MPEG4) {
459         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
460         return -1;
461     }
462
463     if (s->max_b_frames                    &&
464         s->codec_id != CODEC_ID_MPEG4      &&
465         s->codec_id != CODEC_ID_MPEG1VIDEO &&
466         s->codec_id != CODEC_ID_MPEG2VIDEO) {
467         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
468         return -1;
469     }
470
471     if ((s->codec_id == CODEC_ID_MPEG4 ||
472          s->codec_id == CODEC_ID_H263  ||
473          s->codec_id == CODEC_ID_H263P) &&
474         (avctx->sample_aspect_ratio.num > 255 ||
475          avctx->sample_aspect_ratio.den > 255)) {
476         av_log(avctx, AV_LOG_WARNING,
477                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
478                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
479         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
480                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
481     }
482
483     if ((s->codec_id == CODEC_ID_H263  ||
484          s->codec_id == CODEC_ID_H263P) &&
485         (avctx->width  > 2048 ||
486          avctx->height > 1152 )) {
487         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
488         return -1;
489     }
490     if ((s->codec_id == CODEC_ID_H263  ||
491          s->codec_id == CODEC_ID_H263P) &&
492         ((avctx->width &3) ||
493          (avctx->height&3) )) {
494         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
495         return -1;
496     }
497
498     if (s->codec_id == CODEC_ID_MPEG1VIDEO &&
499         (avctx->width  > 4095 ||
500          avctx->height > 4095 )) {
501         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
502         return -1;
503     }
504
505     if (s->codec_id == CODEC_ID_MPEG2VIDEO &&
506         (avctx->width  > 16383 ||
507          avctx->height > 16383 )) {
508         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
509         return -1;
510     }
511
512     if ((s->codec_id == CODEC_ID_WMV1 ||
513          s->codec_id == CODEC_ID_WMV2) &&
514          avctx->width & 1) {
515          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
516          return -1;
517     }
518
519     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
520         s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO) {
521         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
522         return -1;
523     }
524
525     // FIXME mpeg2 uses that too
526     if (s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4) {
527         av_log(avctx, AV_LOG_ERROR,
528                "mpeg2 style quantization not supported by codec\n");
529         return -1;
530     }
531
532 #if FF_API_MPV_GLOBAL_OPTS
533     if (s->flags & CODEC_FLAG_CBP_RD)
534         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
535 #endif
536
537     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
538         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
539         return -1;
540     }
541
542     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
543         s->avctx->mb_decision != FF_MB_DECISION_RD) {
544         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
545         return -1;
546     }
547
548     if (s->avctx->scenechange_threshold < 1000000000 &&
549         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
550         av_log(avctx, AV_LOG_ERROR,
551                "closed gop with scene change detection are not supported yet, "
552                "set threshold to 1000000000\n");
553         return -1;
554     }
555
556     if (s->flags & CODEC_FLAG_LOW_DELAY) {
557         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
558             av_log(avctx, AV_LOG_ERROR,
559                   "low delay forcing is only available for mpeg2\n");
560             return -1;
561         }
562         if (s->max_b_frames != 0) {
563             av_log(avctx, AV_LOG_ERROR,
564                    "b frames cannot be used with low delay\n");
565             return -1;
566         }
567     }
568
569     if (s->q_scale_type == 1) {
570         if (avctx->qmax > 12) {
571             av_log(avctx, AV_LOG_ERROR,
572                    "non linear quant only supports qmax <= 12 currently\n");
573             return -1;
574         }
575     }
576
577     if (s->avctx->thread_count > 1         &&
578         s->codec_id != CODEC_ID_MPEG4      &&
579         s->codec_id != CODEC_ID_MPEG1VIDEO &&
580         s->codec_id != CODEC_ID_MPEG2VIDEO &&
581         s->codec_id != CODEC_ID_MJPEG      &&
582         (s->codec_id != CODEC_ID_H263P)) {
583         av_log(avctx, AV_LOG_ERROR,
584                "multi threaded encoding not supported by codec\n");
585         return -1;
586     }
587
588     if (s->avctx->thread_count < 1) {
589         av_log(avctx, AV_LOG_ERROR,
590                "automatic thread number detection not supported by codec, "
591                "patch welcome\n");
592         return -1;
593     }
594
595     if (s->avctx->thread_count > 1)
596         s->rtp_mode = 1;
597
598     if (!avctx->time_base.den || !avctx->time_base.num) {
599         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
600         return -1;
601     }
602
603     i = (INT_MAX / 2 + 128) >> 8;
604     if (avctx->me_threshold >= i) {
605         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
606                i - 1);
607         return -1;
608     }
609     if (avctx->mb_threshold >= i) {
610         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
611                i - 1);
612         return -1;
613     }
614
615     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
616         av_log(avctx, AV_LOG_INFO,
617                "notice: b_frame_strategy only affects the first pass\n");
618         avctx->b_frame_strategy = 0;
619     }
620
621     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
622     if (i > 1) {
623         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
624         avctx->time_base.den /= i;
625         avctx->time_base.num /= i;
626         //return -1;
627     }
628
629     if (s->mpeg_quant || s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || s->codec_id == CODEC_ID_MJPEG || s->codec_id==CODEC_ID_AMV) {
630         // (a + x * 3 / 8) / x
631         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
632         s->inter_quant_bias = 0;
633     } else {
634         s->intra_quant_bias = 0;
635         // (a - x / 4) / x
636         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
637     }
638
639     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
640         s->intra_quant_bias = avctx->intra_quant_bias;
641     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
642         s->inter_quant_bias = avctx->inter_quant_bias;
643
644     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
645
646     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
647                                   &chroma_v_shift);
648
649     if (avctx->codec_id == CODEC_ID_MPEG4 &&
650         s->avctx->time_base.den > (1 << 16) - 1) {
651         av_log(avctx, AV_LOG_ERROR,
652                "timebase %d/%d not supported by MPEG 4 standard, "
653                "the maximum admitted value for the timebase denominator "
654                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
655                (1 << 16) - 1);
656         return -1;
657     }
658     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
659
660 #if FF_API_MPV_GLOBAL_OPTS
661     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
662         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
663     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
664         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
665     if (avctx->quantizer_noise_shaping)
666         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
667 #endif
668
669     switch (avctx->codec->id) {
670     case CODEC_ID_MPEG1VIDEO:
671         s->out_format = FMT_MPEG1;
672         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
673         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
674         break;
675     case CODEC_ID_MPEG2VIDEO:
676         s->out_format = FMT_MPEG1;
677         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
678         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
679         s->rtp_mode   = 1;
680         break;
681     case CODEC_ID_LJPEG:
682     case CODEC_ID_MJPEG:
683     case CODEC_ID_AMV:
684         s->out_format = FMT_MJPEG;
685         s->intra_only = 1; /* force intra only for jpeg */
686         if (avctx->codec->id == CODEC_ID_LJPEG &&
687             (avctx->pix_fmt == PIX_FMT_BGR0
688              || s->avctx->pix_fmt == PIX_FMT_BGRA
689              || s->avctx->pix_fmt == PIX_FMT_BGR24)) {
690             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
691             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
692             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
693         } else {
694             s->mjpeg_vsample[0] = 2;
695             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
696             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
697             s->mjpeg_hsample[0] = 2;
698             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
699             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
700         }
701         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
702             ff_mjpeg_encode_init(s) < 0)
703             return -1;
704         avctx->delay = 0;
705         s->low_delay = 1;
706         break;
707     case CODEC_ID_H261:
708         if (!CONFIG_H261_ENCODER)
709             return -1;
710         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
711             av_log(avctx, AV_LOG_ERROR,
712                    "The specified picture size of %dx%d is not valid for the "
713                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
714                     s->width, s->height);
715             return -1;
716         }
717         s->out_format = FMT_H261;
718         avctx->delay  = 0;
719         s->low_delay  = 1;
720         break;
721     case CODEC_ID_H263:
722         if (!CONFIG_H263_ENCODER)
723             return -1;
724         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
725                              s->width, s->height) == 8) {
726             av_log(avctx, AV_LOG_ERROR,
727                    "The specified picture size of %dx%d is not valid for "
728                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
729                    "352x288, 704x576, and 1408x1152. "
730                    "Try H.263+.\n", s->width, s->height);
731             return -1;
732         }
733         s->out_format = FMT_H263;
734         avctx->delay  = 0;
735         s->low_delay  = 1;
736         break;
737     case CODEC_ID_H263P:
738         s->out_format = FMT_H263;
739         s->h263_plus  = 1;
740         /* Fx */
741         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
742         s->modified_quant  = s->h263_aic;
743         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
744         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
745
746         /* /Fx */
747         /* These are just to be sure */
748         avctx->delay = 0;
749         s->low_delay = 1;
750         break;
751     case CODEC_ID_FLV1:
752         s->out_format      = FMT_H263;
753         s->h263_flv        = 2; /* format = 1; 11-bit codes */
754         s->unrestricted_mv = 1;
755         s->rtp_mode  = 0; /* don't allow GOB */
756         avctx->delay = 0;
757         s->low_delay = 1;
758         break;
759     case CODEC_ID_RV10:
760         s->out_format = FMT_H263;
761         avctx->delay  = 0;
762         s->low_delay  = 1;
763         break;
764     case CODEC_ID_RV20:
765         s->out_format      = FMT_H263;
766         avctx->delay       = 0;
767         s->low_delay       = 1;
768         s->modified_quant  = 1;
769         s->h263_aic        = 1;
770         s->h263_plus       = 1;
771         s->loop_filter     = 1;
772         s->unrestricted_mv = 0;
773         break;
774     case CODEC_ID_MPEG4:
775         s->out_format      = FMT_H263;
776         s->h263_pred       = 1;
777         s->unrestricted_mv = 1;
778         s->low_delay       = s->max_b_frames ? 0 : 1;
779         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
780         break;
781     case CODEC_ID_MSMPEG4V2:
782         s->out_format      = FMT_H263;
783         s->h263_pred       = 1;
784         s->unrestricted_mv = 1;
785         s->msmpeg4_version = 2;
786         avctx->delay       = 0;
787         s->low_delay       = 1;
788         break;
789     case CODEC_ID_MSMPEG4V3:
790         s->out_format        = FMT_H263;
791         s->h263_pred         = 1;
792         s->unrestricted_mv   = 1;
793         s->msmpeg4_version   = 3;
794         s->flipflop_rounding = 1;
795         avctx->delay         = 0;
796         s->low_delay         = 1;
797         break;
798     case CODEC_ID_WMV1:
799         s->out_format        = FMT_H263;
800         s->h263_pred         = 1;
801         s->unrestricted_mv   = 1;
802         s->msmpeg4_version   = 4;
803         s->flipflop_rounding = 1;
804         avctx->delay         = 0;
805         s->low_delay         = 1;
806         break;
807     case CODEC_ID_WMV2:
808         s->out_format        = FMT_H263;
809         s->h263_pred         = 1;
810         s->unrestricted_mv   = 1;
811         s->msmpeg4_version   = 5;
812         s->flipflop_rounding = 1;
813         avctx->delay         = 0;
814         s->low_delay         = 1;
815         break;
816     default:
817         return -1;
818     }
819
820     avctx->has_b_frames = !s->low_delay;
821
822     s->encoding = 1;
823
824     s->progressive_frame    =
825     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
826                                                 CODEC_FLAG_INTERLACED_ME) ||
827                                 s->alternate_scan);
828
829     /* init */
830     if (ff_MPV_common_init(s) < 0)
831         return -1;
832
833     if (!s->dct_quantize)
834         s->dct_quantize = ff_dct_quantize_c;
835     if (!s->denoise_dct)
836         s->denoise_dct  = denoise_dct_c;
837     s->fast_dct_quantize = s->dct_quantize;
838     if (avctx->trellis)
839         s->dct_quantize  = dct_quantize_trellis_c;
840
841     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
842         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
843
844     s->quant_precision = 5;
845
846     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
847     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
848
849     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
850         ff_h261_encode_init(s);
851     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
852         ff_h263_encode_init(s);
853     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
854         ff_msmpeg4_encode_init(s);
855     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
856         && s->out_format == FMT_MPEG1)
857         ff_mpeg1_encode_init(s);
858
859     /* init q matrix */
860     for (i = 0; i < 64; i++) {
861         int j = s->dsp.idct_permutation[i];
862         if (CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4 &&
863             s->mpeg_quant) {
864             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
865             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
866         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
867             s->intra_matrix[j] =
868             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
869         } else {
870             /* mpeg1/2 */
871             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
872             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
873         }
874         if (s->avctx->intra_matrix)
875             s->intra_matrix[j] = s->avctx->intra_matrix[i];
876         if (s->avctx->inter_matrix)
877             s->inter_matrix[j] = s->avctx->inter_matrix[i];
878     }
879
880     /* precompute matrix */
881     /* for mjpeg, we do include qscale in the matrix */
882     if (s->out_format != FMT_MJPEG) {
883         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
884                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
885                           31, 1);
886         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
887                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
888                           31, 0);
889     }
890
891     if (ff_rate_control_init(s) < 0)
892         return -1;
893
894     return 0;
895 }
896
897 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
898 {
899     MpegEncContext *s = avctx->priv_data;
900
901     ff_rate_control_uninit(s);
902
903     ff_MPV_common_end(s);
904     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
905         s->out_format == FMT_MJPEG)
906         ff_mjpeg_encode_close(s);
907
908     av_freep(&avctx->extradata);
909
910     return 0;
911 }
912
913 static int get_sae(uint8_t *src, int ref, int stride)
914 {
915     int x,y;
916     int acc = 0;
917
918     for (y = 0; y < 16; y++) {
919         for (x = 0; x < 16; x++) {
920             acc += FFABS(src[x + y * stride] - ref);
921         }
922     }
923
924     return acc;
925 }
926
927 static int get_intra_count(MpegEncContext *s, uint8_t *src,
928                            uint8_t *ref, int stride)
929 {
930     int x, y, w, h;
931     int acc = 0;
932
933     w = s->width  & ~15;
934     h = s->height & ~15;
935
936     for (y = 0; y < h; y += 16) {
937         for (x = 0; x < w; x += 16) {
938             int offset = x + y * stride;
939             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
940                                      16);
941             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
942             int sae  = get_sae(src + offset, mean, stride);
943
944             acc += sae + 500 < sad;
945         }
946     }
947     return acc;
948 }
949
950
951 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
952 {
953     AVFrame *pic = NULL;
954     int64_t pts;
955     int i;
956     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
957                                                  (s->low_delay ? 0 : 1);
958     int direct = 1;
959
960     if (pic_arg) {
961         pts = pic_arg->pts;
962         pic_arg->display_picture_number = s->input_picture_number++;
963
964         if (pts != AV_NOPTS_VALUE) {
965             if (s->user_specified_pts != AV_NOPTS_VALUE) {
966                 int64_t time = pts;
967                 int64_t last = s->user_specified_pts;
968
969                 if (time <= last) {
970                     av_log(s->avctx, AV_LOG_ERROR,
971                            "Error, Invalid timestamp=%"PRId64", "
972                            "last=%"PRId64"\n", pts, s->user_specified_pts);
973                     return -1;
974                 }
975
976                 if (!s->low_delay && pic_arg->display_picture_number == 1)
977                     s->dts_delta = time - last;
978             }
979             s->user_specified_pts = pts;
980         } else {
981             if (s->user_specified_pts != AV_NOPTS_VALUE) {
982                 s->user_specified_pts =
983                 pts = s->user_specified_pts + 1;
984                 av_log(s->avctx, AV_LOG_INFO,
985                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
986                        pts);
987             } else {
988                 pts = pic_arg->display_picture_number;
989             }
990         }
991     }
992
993   if (pic_arg) {
994     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
995         direct = 0;
996     if (pic_arg->linesize[0] != s->linesize)
997         direct = 0;
998     if (pic_arg->linesize[1] != s->uvlinesize)
999         direct = 0;
1000     if (pic_arg->linesize[2] != s->uvlinesize)
1001         direct = 0;
1002
1003     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
1004     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
1005
1006     if (direct) {
1007         i = ff_find_unused_picture(s, 1);
1008         if (i < 0)
1009             return i;
1010
1011         pic = &s->picture[i].f;
1012         pic->reference = 3;
1013
1014         for (i = 0; i < 4; i++) {
1015             pic->data[i]     = pic_arg->data[i];
1016             pic->linesize[i] = pic_arg->linesize[i];
1017         }
1018         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
1019             return -1;
1020         }
1021     } else {
1022         i = ff_find_unused_picture(s, 0);
1023         if (i < 0)
1024             return i;
1025
1026         pic = &s->picture[i].f;
1027         pic->reference = 3;
1028
1029         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
1030             return -1;
1031         }
1032
1033         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1034             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1035             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1036             // empty
1037         } else {
1038             int h_chroma_shift, v_chroma_shift;
1039             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
1040                                           &v_chroma_shift);
1041
1042             for (i = 0; i < 3; i++) {
1043                 int src_stride = pic_arg->linesize[i];
1044                 int dst_stride = i ? s->uvlinesize : s->linesize;
1045                 int h_shift = i ? h_chroma_shift : 0;
1046                 int v_shift = i ? v_chroma_shift : 0;
1047                 int w = s->width  >> h_shift;
1048                 int h = s->height >> v_shift;
1049                 uint8_t *src = pic_arg->data[i];
1050                 uint8_t *dst = pic->data[i];
1051
1052                 if(s->codec_id == CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)){
1053                     h= ((s->height+15)/16*16)>>v_shift;
1054                 }
1055
1056                 if (!s->avctx->rc_buffer_size)
1057                     dst += INPLACE_OFFSET;
1058
1059                 if (src_stride == dst_stride)
1060                     memcpy(dst, src, src_stride * h);
1061                 else {
1062                     while (h--) {
1063                         memcpy(dst, src, w);
1064                         dst += dst_stride;
1065                         src += src_stride;
1066                     }
1067                 }
1068             }
1069         }
1070     }
1071     copy_picture_attributes(s, pic, pic_arg);
1072     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1073   }
1074
1075     /* shift buffer entries */
1076     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1077         s->input_picture[i - 1] = s->input_picture[i];
1078
1079     s->input_picture[encoding_delay] = (Picture*) pic;
1080
1081     return 0;
1082 }
1083
1084 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1085 {
1086     int x, y, plane;
1087     int score = 0;
1088     int64_t score64 = 0;
1089
1090     for (plane = 0; plane < 3; plane++) {
1091         const int stride = p->f.linesize[plane];
1092         const int bw = plane ? 1 : 2;
1093         for (y = 0; y < s->mb_height * bw; y++) {
1094             for (x = 0; x < s->mb_width * bw; x++) {
1095                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1096                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1097                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1098                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1099
1100                 switch (s->avctx->frame_skip_exp) {
1101                 case 0: score    =  FFMAX(score, v);          break;
1102                 case 1: score   += FFABS(v);                  break;
1103                 case 2: score   += v * v;                     break;
1104                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1105                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1106                 }
1107             }
1108         }
1109     }
1110
1111     if (score)
1112         score64 = score;
1113
1114     if (score64 < s->avctx->frame_skip_threshold)
1115         return 1;
1116     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1117         return 1;
1118     return 0;
1119 }
1120
1121 static int estimate_best_b_count(MpegEncContext *s)
1122 {
1123     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1124     AVCodecContext *c = avcodec_alloc_context3(NULL);
1125     AVFrame input[FF_MAX_B_FRAMES + 2];
1126     const int scale = s->avctx->brd_scale;
1127     int i, j, out_size, p_lambda, b_lambda, lambda2;
1128     int outbuf_size  = s->width * s->height; // FIXME
1129     uint8_t *outbuf  = av_malloc(outbuf_size);
1130     int64_t best_rd  = INT64_MAX;
1131     int best_b_count = -1;
1132
1133     av_assert0(scale >= 0 && scale <= 3);
1134
1135     //emms_c();
1136     //s->next_picture_ptr->quality;
1137     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1138     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1139     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1140     if (!b_lambda) // FIXME we should do this somewhere else
1141         b_lambda = p_lambda;
1142     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1143                FF_LAMBDA_SHIFT;
1144
1145     c->width        = s->width  >> scale;
1146     c->height       = s->height >> scale;
1147     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1148                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1149     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1150     c->mb_decision  = s->avctx->mb_decision;
1151     c->me_cmp       = s->avctx->me_cmp;
1152     c->mb_cmp       = s->avctx->mb_cmp;
1153     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1154     c->pix_fmt      = PIX_FMT_YUV420P;
1155     c->time_base    = s->avctx->time_base;
1156     c->max_b_frames = s->max_b_frames;
1157
1158     if (avcodec_open2(c, codec, NULL) < 0)
1159         return -1;
1160
1161     for (i = 0; i < s->max_b_frames + 2; i++) {
1162         int ysize = c->width * c->height;
1163         int csize = (c->width / 2) * (c->height / 2);
1164         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1165                                                 s->next_picture_ptr;
1166
1167         avcodec_get_frame_defaults(&input[i]);
1168         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1169         input[i].data[1]     = input[i].data[0] + ysize;
1170         input[i].data[2]     = input[i].data[1] + csize;
1171         input[i].linesize[0] = c->width;
1172         input[i].linesize[1] =
1173         input[i].linesize[2] = c->width / 2;
1174
1175         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1176             pre_input = *pre_input_ptr;
1177
1178             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1179                 pre_input.f.data[0] += INPLACE_OFFSET;
1180                 pre_input.f.data[1] += INPLACE_OFFSET;
1181                 pre_input.f.data[2] += INPLACE_OFFSET;
1182             }
1183
1184             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1185                                  pre_input.f.data[0], pre_input.f.linesize[0],
1186                                  c->width,      c->height);
1187             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1188                                  pre_input.f.data[1], pre_input.f.linesize[1],
1189                                  c->width >> 1, c->height >> 1);
1190             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1191                                  pre_input.f.data[2], pre_input.f.linesize[2],
1192                                  c->width >> 1, c->height >> 1);
1193         }
1194     }
1195
1196     for (j = 0; j < s->max_b_frames + 1; j++) {
1197         int64_t rd = 0;
1198
1199         if (!s->input_picture[j])
1200             break;
1201
1202         c->error[0] = c->error[1] = c->error[2] = 0;
1203
1204         input[0].pict_type = AV_PICTURE_TYPE_I;
1205         input[0].quality   = 1 * FF_QP2LAMBDA;
1206         out_size           = avcodec_encode_video(c, outbuf,
1207                                                   outbuf_size, &input[0]);
1208         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1209
1210         for (i = 0; i < s->max_b_frames + 1; i++) {
1211             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1212
1213             input[i + 1].pict_type = is_p ?
1214                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1215             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1216             out_size = avcodec_encode_video(c, outbuf, outbuf_size,
1217                                             &input[i + 1]);
1218             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1219         }
1220
1221         /* get the delayed frames */
1222         while (out_size) {
1223             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
1224             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1225         }
1226
1227         rd += c->error[0] + c->error[1] + c->error[2];
1228
1229         if (rd < best_rd) {
1230             best_rd = rd;
1231             best_b_count = j;
1232         }
1233     }
1234
1235     av_freep(&outbuf);
1236     avcodec_close(c);
1237     av_freep(&c);
1238
1239     for (i = 0; i < s->max_b_frames + 2; i++) {
1240         av_freep(&input[i].data[0]);
1241     }
1242
1243     return best_b_count;
1244 }
1245
1246 static int select_input_picture(MpegEncContext *s)
1247 {
1248     int i;
1249
1250     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1251         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1252     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1253
1254     /* set next picture type & ordering */
1255     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1256         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1257             s->next_picture_ptr == NULL || s->intra_only) {
1258             s->reordered_input_picture[0] = s->input_picture[0];
1259             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1260             s->reordered_input_picture[0]->f.coded_picture_number =
1261                 s->coded_picture_number++;
1262         } else {
1263             int b_frames;
1264
1265             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1266                 if (s->picture_in_gop_number < s->gop_size &&
1267                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1268                     // FIXME check that te gop check above is +-1 correct
1269                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1270                     //       s->input_picture[0]->f.data[0],
1271                     //       s->input_picture[0]->pts);
1272
1273                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1274                         for (i = 0; i < 4; i++)
1275                             s->input_picture[0]->f.data[i] = NULL;
1276                         s->input_picture[0]->f.type = 0;
1277                     } else {
1278                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1279                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1280
1281                         s->avctx->release_buffer(s->avctx,
1282                                                  &s->input_picture[0]->f);
1283                     }
1284
1285                     emms_c();
1286                     ff_vbv_update(s, 0);
1287
1288                     goto no_output_pic;
1289                 }
1290             }
1291
1292             if (s->flags & CODEC_FLAG_PASS2) {
1293                 for (i = 0; i < s->max_b_frames + 1; i++) {
1294                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1295
1296                     if (pict_num >= s->rc_context.num_entries)
1297                         break;
1298                     if (!s->input_picture[i]) {
1299                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1300                         break;
1301                     }
1302
1303                     s->input_picture[i]->f.pict_type =
1304                         s->rc_context.entry[pict_num].new_pict_type;
1305                 }
1306             }
1307
1308             if (s->avctx->b_frame_strategy == 0) {
1309                 b_frames = s->max_b_frames;
1310                 while (b_frames && !s->input_picture[b_frames])
1311                     b_frames--;
1312             } else if (s->avctx->b_frame_strategy == 1) {
1313                 for (i = 1; i < s->max_b_frames + 1; i++) {
1314                     if (s->input_picture[i] &&
1315                         s->input_picture[i]->b_frame_score == 0) {
1316                         s->input_picture[i]->b_frame_score =
1317                             get_intra_count(s,
1318                                             s->input_picture[i    ]->f.data[0],
1319                                             s->input_picture[i - 1]->f.data[0],
1320                                             s->linesize) + 1;
1321                     }
1322                 }
1323                 for (i = 0; i < s->max_b_frames + 1; i++) {
1324                     if (s->input_picture[i] == NULL ||
1325                         s->input_picture[i]->b_frame_score - 1 >
1326                             s->mb_num / s->avctx->b_sensitivity)
1327                         break;
1328                 }
1329
1330                 b_frames = FFMAX(0, i - 1);
1331
1332                 /* reset scores */
1333                 for (i = 0; i < b_frames + 1; i++) {
1334                     s->input_picture[i]->b_frame_score = 0;
1335                 }
1336             } else if (s->avctx->b_frame_strategy == 2) {
1337                 b_frames = estimate_best_b_count(s);
1338             } else {
1339                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1340                 b_frames = 0;
1341             }
1342
1343             emms_c();
1344             //static int b_count = 0;
1345             //b_count += b_frames;
1346             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1347
1348             for (i = b_frames - 1; i >= 0; i--) {
1349                 int type = s->input_picture[i]->f.pict_type;
1350                 if (type && type != AV_PICTURE_TYPE_B)
1351                     b_frames = i;
1352             }
1353             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1354                 b_frames == s->max_b_frames) {
1355                 av_log(s->avctx, AV_LOG_ERROR,
1356                        "warning, too many b frames in a row\n");
1357             }
1358
1359             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1360                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1361                     s->gop_size > s->picture_in_gop_number) {
1362                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1363                 } else {
1364                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1365                         b_frames = 0;
1366                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1367                 }
1368             }
1369
1370             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1371                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1372                 b_frames--;
1373
1374             s->reordered_input_picture[0] = s->input_picture[b_frames];
1375             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1376                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1377             s->reordered_input_picture[0]->f.coded_picture_number =
1378                 s->coded_picture_number++;
1379             for (i = 0; i < b_frames; i++) {
1380                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1381                 s->reordered_input_picture[i + 1]->f.pict_type =
1382                     AV_PICTURE_TYPE_B;
1383                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1384                     s->coded_picture_number++;
1385             }
1386         }
1387     }
1388 no_output_pic:
1389     if (s->reordered_input_picture[0]) {
1390         s->reordered_input_picture[0]->f.reference =
1391            s->reordered_input_picture[0]->f.pict_type !=
1392                AV_PICTURE_TYPE_B ? 3 : 0;
1393
1394         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1395
1396         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1397             s->avctx->rc_buffer_size) {
1398             // input is a shared pix, so we can't modifiy it -> alloc a new
1399             // one & ensure that the shared one is reuseable
1400
1401             Picture *pic;
1402             int i = ff_find_unused_picture(s, 0);
1403             if (i < 0)
1404                 return i;
1405             pic = &s->picture[i];
1406
1407             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1408             if (ff_alloc_picture(s, pic, 0) < 0) {
1409                 return -1;
1410             }
1411
1412             /* mark us unused / free shared pic */
1413             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1414                 s->avctx->release_buffer(s->avctx,
1415                                          &s->reordered_input_picture[0]->f);
1416             for (i = 0; i < 4; i++)
1417                 s->reordered_input_picture[0]->f.data[i] = NULL;
1418             s->reordered_input_picture[0]->f.type = 0;
1419
1420             copy_picture_attributes(s, &pic->f,
1421                                     &s->reordered_input_picture[0]->f);
1422
1423             s->current_picture_ptr = pic;
1424         } else {
1425             // input is not a shared pix -> reuse buffer for current_pix
1426
1427             assert(s->reordered_input_picture[0]->f.type ==
1428                        FF_BUFFER_TYPE_USER ||
1429                    s->reordered_input_picture[0]->f.type ==
1430                        FF_BUFFER_TYPE_INTERNAL);
1431
1432             s->current_picture_ptr = s->reordered_input_picture[0];
1433             for (i = 0; i < 4; i++) {
1434                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1435             }
1436         }
1437         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1438
1439         s->picture_number = s->new_picture.f.display_picture_number;
1440         //printf("dpn:%d\n", s->picture_number);
1441     } else {
1442         memset(&s->new_picture, 0, sizeof(Picture));
1443     }
1444     return 0;
1445 }
1446
1447 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1448                           AVFrame *pic_arg, int *got_packet)
1449 {
1450     MpegEncContext *s = avctx->priv_data;
1451     int i, stuffing_count, ret;
1452     int context_count = s->slice_context_count;
1453
1454     s->picture_in_gop_number++;
1455
1456     if (load_input_picture(s, pic_arg) < 0)
1457         return -1;
1458
1459     if (select_input_picture(s) < 0) {
1460         return -1;
1461     }
1462
1463     /* output? */
1464     if (s->new_picture.f.data[0]) {
1465         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1466             return ret;
1467         if (s->mb_info) {
1468             s->mb_info_ptr = av_packet_new_side_data(pkt,
1469                                  AV_PKT_DATA_H263_MB_INFO,
1470                                  s->mb_width*s->mb_height*12);
1471             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1472         }
1473
1474         for (i = 0; i < context_count; i++) {
1475             int start_y = s->thread_context[i]->start_mb_y;
1476             int   end_y = s->thread_context[i]->  end_mb_y;
1477             int h       = s->mb_height;
1478             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1479             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1480
1481             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1482         }
1483
1484         s->pict_type = s->new_picture.f.pict_type;
1485         //emms_c();
1486         //printf("qs:%f %f %d\n", s->new_picture.quality,
1487         //       s->current_picture.quality, s->qscale);
1488         ff_MPV_frame_start(s, avctx);
1489 vbv_retry:
1490         if (encode_picture(s, s->picture_number) < 0)
1491             return -1;
1492
1493         avctx->header_bits = s->header_bits;
1494         avctx->mv_bits     = s->mv_bits;
1495         avctx->misc_bits   = s->misc_bits;
1496         avctx->i_tex_bits  = s->i_tex_bits;
1497         avctx->p_tex_bits  = s->p_tex_bits;
1498         avctx->i_count     = s->i_count;
1499         // FIXME f/b_count in avctx
1500         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1501         avctx->skip_count  = s->skip_count;
1502
1503         ff_MPV_frame_end(s);
1504
1505         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1506             ff_mjpeg_encode_picture_trailer(s);
1507
1508         if (avctx->rc_buffer_size) {
1509             RateControlContext *rcc = &s->rc_context;
1510             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1511
1512             if (put_bits_count(&s->pb) > max_size &&
1513                 s->lambda < s->avctx->lmax) {
1514                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1515                                        (s->qscale + 1) / s->qscale);
1516                 if (s->adaptive_quant) {
1517                     int i;
1518                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1519                         s->lambda_table[i] =
1520                             FFMAX(s->lambda_table[i] + 1,
1521                                   s->lambda_table[i] * (s->qscale + 1) /
1522                                   s->qscale);
1523                 }
1524                 s->mb_skipped = 0;        // done in MPV_frame_start()
1525                 // done in encode_picture() so we must undo it
1526                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1527                     if (s->flipflop_rounding          ||
1528                         s->codec_id == CODEC_ID_H263P ||
1529                         s->codec_id == CODEC_ID_MPEG4)
1530                         s->no_rounding ^= 1;
1531                 }
1532                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1533                     s->time_base       = s->last_time_base;
1534                     s->last_non_b_time = s->time - s->pp_time;
1535                 }
1536                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1537                 for (i = 0; i < context_count; i++) {
1538                     PutBitContext *pb = &s->thread_context[i]->pb;
1539                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1540                 }
1541                 goto vbv_retry;
1542             }
1543
1544             assert(s->avctx->rc_max_rate);
1545         }
1546
1547         if (s->flags & CODEC_FLAG_PASS1)
1548             ff_write_pass1_stats(s);
1549
1550         for (i = 0; i < 4; i++) {
1551             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1552             avctx->error[i] += s->current_picture_ptr->f.error[i];
1553         }
1554
1555         if (s->flags & CODEC_FLAG_PASS1)
1556             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1557                    avctx->i_tex_bits + avctx->p_tex_bits ==
1558                        put_bits_count(&s->pb));
1559         flush_put_bits(&s->pb);
1560         s->frame_bits  = put_bits_count(&s->pb);
1561
1562         stuffing_count = ff_vbv_update(s, s->frame_bits);
1563         if (stuffing_count) {
1564             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1565                     stuffing_count + 50) {
1566                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1567                 return -1;
1568             }
1569
1570             switch (s->codec_id) {
1571             case CODEC_ID_MPEG1VIDEO:
1572             case CODEC_ID_MPEG2VIDEO:
1573                 while (stuffing_count--) {
1574                     put_bits(&s->pb, 8, 0);
1575                 }
1576             break;
1577             case CODEC_ID_MPEG4:
1578                 put_bits(&s->pb, 16, 0);
1579                 put_bits(&s->pb, 16, 0x1C3);
1580                 stuffing_count -= 4;
1581                 while (stuffing_count--) {
1582                     put_bits(&s->pb, 8, 0xFF);
1583                 }
1584             break;
1585             default:
1586                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1587             }
1588             flush_put_bits(&s->pb);
1589             s->frame_bits  = put_bits_count(&s->pb);
1590         }
1591
1592         /* update mpeg1/2 vbv_delay for CBR */
1593         if (s->avctx->rc_max_rate                          &&
1594             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1595             s->out_format == FMT_MPEG1                     &&
1596             90000LL * (avctx->rc_buffer_size - 1) <=
1597                 s->avctx->rc_max_rate * 0xFFFFLL) {
1598             int vbv_delay, min_delay;
1599             double inbits  = s->avctx->rc_max_rate *
1600                              av_q2d(s->avctx->time_base);
1601             int    minbits = s->frame_bits - 8 *
1602                              (s->vbv_delay_ptr - s->pb.buf - 1);
1603             double bits    = s->rc_context.buffer_index + minbits - inbits;
1604
1605             if (bits < 0)
1606                 av_log(s->avctx, AV_LOG_ERROR,
1607                        "Internal error, negative bits\n");
1608
1609             assert(s->repeat_first_field == 0);
1610
1611             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1612             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1613                         s->avctx->rc_max_rate;
1614
1615             vbv_delay = FFMAX(vbv_delay, min_delay);
1616
1617             av_assert0(vbv_delay < 0xFFFF);
1618
1619             s->vbv_delay_ptr[0] &= 0xF8;
1620             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1621             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1622             s->vbv_delay_ptr[2] &= 0x07;
1623             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1624             avctx->vbv_delay     = vbv_delay * 300;
1625         }
1626         s->total_bits     += s->frame_bits;
1627         avctx->frame_bits  = s->frame_bits;
1628
1629         pkt->pts = s->current_picture.f.pts;
1630         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1631             if (!s->current_picture.f.coded_picture_number)
1632                 pkt->dts = pkt->pts - s->dts_delta;
1633             else
1634                 pkt->dts = s->reordered_pts;
1635             s->reordered_pts = pkt->pts;
1636         } else
1637             pkt->dts = pkt->pts;
1638         if (s->current_picture.f.key_frame)
1639             pkt->flags |= AV_PKT_FLAG_KEY;
1640         if (s->mb_info)
1641             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1642     } else {
1643         s->frame_bits = 0;
1644     }
1645     assert((s->frame_bits & 7) == 0);
1646
1647     pkt->size = s->frame_bits / 8;
1648     *got_packet = !!pkt->size;
1649     return 0;
1650 }
1651
1652 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1653                                                 int n, int threshold)
1654 {
1655     static const char tab[64] = {
1656         3, 2, 2, 1, 1, 1, 1, 1,
1657         1, 1, 1, 1, 1, 1, 1, 1,
1658         1, 1, 1, 1, 1, 1, 1, 1,
1659         0, 0, 0, 0, 0, 0, 0, 0,
1660         0, 0, 0, 0, 0, 0, 0, 0,
1661         0, 0, 0, 0, 0, 0, 0, 0,
1662         0, 0, 0, 0, 0, 0, 0, 0,
1663         0, 0, 0, 0, 0, 0, 0, 0
1664     };
1665     int score = 0;
1666     int run = 0;
1667     int i;
1668     DCTELEM *block = s->block[n];
1669     const int last_index = s->block_last_index[n];
1670     int skip_dc;
1671
1672     if (threshold < 0) {
1673         skip_dc = 0;
1674         threshold = -threshold;
1675     } else
1676         skip_dc = 1;
1677
1678     /* Are all we could set to zero already zero? */
1679     if (last_index <= skip_dc - 1)
1680         return;
1681
1682     for (i = 0; i <= last_index; i++) {
1683         const int j = s->intra_scantable.permutated[i];
1684         const int level = FFABS(block[j]);
1685         if (level == 1) {
1686             if (skip_dc && i == 0)
1687                 continue;
1688             score += tab[run];
1689             run = 0;
1690         } else if (level > 1) {
1691             return;
1692         } else {
1693             run++;
1694         }
1695     }
1696     if (score >= threshold)
1697         return;
1698     for (i = skip_dc; i <= last_index; i++) {
1699         const int j = s->intra_scantable.permutated[i];
1700         block[j] = 0;
1701     }
1702     if (block[0])
1703         s->block_last_index[n] = 0;
1704     else
1705         s->block_last_index[n] = -1;
1706 }
1707
1708 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1709                                int last_index)
1710 {
1711     int i;
1712     const int maxlevel = s->max_qcoeff;
1713     const int minlevel = s->min_qcoeff;
1714     int overflow = 0;
1715
1716     if (s->mb_intra) {
1717         i = 1; // skip clipping of intra dc
1718     } else
1719         i = 0;
1720
1721     for (; i <= last_index; i++) {
1722         const int j = s->intra_scantable.permutated[i];
1723         int level = block[j];
1724
1725         if (level > maxlevel) {
1726             level = maxlevel;
1727             overflow++;
1728         } else if (level < minlevel) {
1729             level = minlevel;
1730             overflow++;
1731         }
1732
1733         block[j] = level;
1734     }
1735
1736     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1737         av_log(s->avctx, AV_LOG_INFO,
1738                "warning, clipping %d dct coefficients to %d..%d\n",
1739                overflow, minlevel, maxlevel);
1740 }
1741
1742 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1743 {
1744     int x, y;
1745     // FIXME optimize
1746     for (y = 0; y < 8; y++) {
1747         for (x = 0; x < 8; x++) {
1748             int x2, y2;
1749             int sum = 0;
1750             int sqr = 0;
1751             int count = 0;
1752
1753             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1754                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1755                     int v = ptr[x2 + y2 * stride];
1756                     sum += v;
1757                     sqr += v * v;
1758                     count++;
1759                 }
1760             }
1761             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1762         }
1763     }
1764 }
1765
1766 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1767                                                 int motion_x, int motion_y,
1768                                                 int mb_block_height,
1769                                                 int mb_block_count)
1770 {
1771     int16_t weight[8][64];
1772     DCTELEM orig[8][64];
1773     const int mb_x = s->mb_x;
1774     const int mb_y = s->mb_y;
1775     int i;
1776     int skip_dct[8];
1777     int dct_offset = s->linesize * 8; // default for progressive frames
1778     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1779     int wrap_y, wrap_c;
1780
1781     for (i = 0; i < mb_block_count; i++)
1782         skip_dct[i] = s->skipdct;
1783
1784     if (s->adaptive_quant) {
1785         const int last_qp = s->qscale;
1786         const int mb_xy = mb_x + mb_y * s->mb_stride;
1787
1788         s->lambda = s->lambda_table[mb_xy];
1789         update_qscale(s);
1790
1791         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1792             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1793             s->dquant = s->qscale - last_qp;
1794
1795             if (s->out_format == FMT_H263) {
1796                 s->dquant = av_clip(s->dquant, -2, 2);
1797
1798                 if (s->codec_id == CODEC_ID_MPEG4) {
1799                     if (!s->mb_intra) {
1800                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1801                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1802                                 s->dquant = 0;
1803                         }
1804                         if (s->mv_type == MV_TYPE_8X8)
1805                             s->dquant = 0;
1806                     }
1807                 }
1808             }
1809         }
1810         ff_set_qscale(s, last_qp + s->dquant);
1811     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1812         ff_set_qscale(s, s->qscale + s->dquant);
1813
1814     wrap_y = s->linesize;
1815     wrap_c = s->uvlinesize;
1816     ptr_y  = s->new_picture.f.data[0] +
1817              (mb_y * 16 * wrap_y)              + mb_x * 16;
1818     ptr_cb = s->new_picture.f.data[1] +
1819              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1820     ptr_cr = s->new_picture.f.data[2] +
1821              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1822
1823     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != CODEC_ID_AMV){
1824         uint8_t *ebuf = s->edge_emu_buffer + 32;
1825         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1826                                 mb_y * 16, s->width, s->height);
1827         ptr_y = ebuf;
1828         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1829                                 mb_block_height, mb_x * 8, mb_y * 8,
1830                                 (s->width+1) >> 1, (s->height+1) >> 1);
1831         ptr_cb = ebuf + 18 * wrap_y;
1832         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1833                                 mb_block_height, mb_x * 8, mb_y * 8,
1834                                 (s->width+1) >> 1, (s->height+1) >> 1);
1835         ptr_cr = ebuf + 18 * wrap_y + 8;
1836     }
1837
1838     if (s->mb_intra) {
1839         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1840             int progressive_score, interlaced_score;
1841
1842             s->interlaced_dct = 0;
1843             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1844                                                     NULL, wrap_y, 8) +
1845                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1846                                                     NULL, wrap_y, 8) - 400;
1847
1848             if (progressive_score > 0) {
1849                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1850                                                        NULL, wrap_y * 2, 8) +
1851                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1852                                                        NULL, wrap_y * 2, 8);
1853                 if (progressive_score > interlaced_score) {
1854                     s->interlaced_dct = 1;
1855
1856                     dct_offset = wrap_y;
1857                     wrap_y <<= 1;
1858                     if (s->chroma_format == CHROMA_422)
1859                         wrap_c <<= 1;
1860                 }
1861             }
1862         }
1863
1864         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1865         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1866         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1867         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1868
1869         if (s->flags & CODEC_FLAG_GRAY) {
1870             skip_dct[4] = 1;
1871             skip_dct[5] = 1;
1872         } else {
1873             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1874             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1875             if (!s->chroma_y_shift) { /* 422 */
1876                 s->dsp.get_pixels(s->block[6],
1877                                   ptr_cb + (dct_offset >> 1), wrap_c);
1878                 s->dsp.get_pixels(s->block[7],
1879                                   ptr_cr + (dct_offset >> 1), wrap_c);
1880             }
1881         }
1882     } else {
1883         op_pixels_func (*op_pix)[4];
1884         qpel_mc_func (*op_qpix)[16];
1885         uint8_t *dest_y, *dest_cb, *dest_cr;
1886
1887         dest_y  = s->dest[0];
1888         dest_cb = s->dest[1];
1889         dest_cr = s->dest[2];
1890
1891         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1892             op_pix  = s->dsp.put_pixels_tab;
1893             op_qpix = s->dsp.put_qpel_pixels_tab;
1894         } else {
1895             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1896             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1897         }
1898
1899         if (s->mv_dir & MV_DIR_FORWARD) {
1900             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data,
1901                        op_pix, op_qpix);
1902             op_pix  = s->dsp.avg_pixels_tab;
1903             op_qpix = s->dsp.avg_qpel_pixels_tab;
1904         }
1905         if (s->mv_dir & MV_DIR_BACKWARD) {
1906             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data,
1907                        op_pix, op_qpix);
1908         }
1909
1910         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1911             int progressive_score, interlaced_score;
1912
1913             s->interlaced_dct = 0;
1914             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1915                                                     ptr_y,              wrap_y,
1916                                                     8) +
1917                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1918                                                     ptr_y + wrap_y * 8, wrap_y,
1919                                                     8) - 400;
1920
1921             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1922                 progressive_score -= 400;
1923
1924             if (progressive_score > 0) {
1925                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1926                                                        ptr_y,
1927                                                        wrap_y * 2, 8) +
1928                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1929                                                        ptr_y + wrap_y,
1930                                                        wrap_y * 2, 8);
1931
1932                 if (progressive_score > interlaced_score) {
1933                     s->interlaced_dct = 1;
1934
1935                     dct_offset = wrap_y;
1936                     wrap_y <<= 1;
1937                     if (s->chroma_format == CHROMA_422)
1938                         wrap_c <<= 1;
1939                 }
1940             }
1941         }
1942
1943         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1944         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1945         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1946                            dest_y + dct_offset, wrap_y);
1947         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1948                            dest_y + dct_offset + 8, wrap_y);
1949
1950         if (s->flags & CODEC_FLAG_GRAY) {
1951             skip_dct[4] = 1;
1952             skip_dct[5] = 1;
1953         } else {
1954             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1955             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1956             if (!s->chroma_y_shift) { /* 422 */
1957                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1958                                    dest_cb + (dct_offset >> 1), wrap_c);
1959                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1960                                    dest_cr + (dct_offset >> 1), wrap_c);
1961             }
1962         }
1963         /* pre quantization */
1964         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1965                 2 * s->qscale * s->qscale) {
1966             // FIXME optimize
1967             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1968                               wrap_y, 8) < 20 * s->qscale)
1969                 skip_dct[0] = 1;
1970             if (s->dsp.sad[1](NULL, ptr_y + 8,
1971                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1972                 skip_dct[1] = 1;
1973             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1974                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1975                 skip_dct[2] = 1;
1976             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1977                               dest_y + dct_offset + 8,
1978                               wrap_y, 8) < 20 * s->qscale)
1979                 skip_dct[3] = 1;
1980             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1981                               wrap_c, 8) < 20 * s->qscale)
1982                 skip_dct[4] = 1;
1983             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1984                               wrap_c, 8) < 20 * s->qscale)
1985                 skip_dct[5] = 1;
1986             if (!s->chroma_y_shift) { /* 422 */
1987                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1988                                   dest_cb + (dct_offset >> 1),
1989                                   wrap_c, 8) < 20 * s->qscale)
1990                     skip_dct[6] = 1;
1991                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1992                                   dest_cr + (dct_offset >> 1),
1993                                   wrap_c, 8) < 20 * s->qscale)
1994                     skip_dct[7] = 1;
1995             }
1996         }
1997     }
1998
1999     if (s->quantizer_noise_shaping) {
2000         if (!skip_dct[0])
2001             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2002         if (!skip_dct[1])
2003             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2004         if (!skip_dct[2])
2005             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2006         if (!skip_dct[3])
2007             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2008         if (!skip_dct[4])
2009             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2010         if (!skip_dct[5])
2011             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2012         if (!s->chroma_y_shift) { /* 422 */
2013             if (!skip_dct[6])
2014                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2015                                   wrap_c);
2016             if (!skip_dct[7])
2017                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2018                                   wrap_c);
2019         }
2020         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
2021     }
2022
2023     /* DCT & quantize */
2024     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2025     {
2026         for (i = 0; i < mb_block_count; i++) {
2027             if (!skip_dct[i]) {
2028                 int overflow;
2029                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2030                 // FIXME we could decide to change to quantizer instead of
2031                 // clipping
2032                 // JS: I don't think that would be a good idea it could lower
2033                 //     quality instead of improve it. Just INTRADC clipping
2034                 //     deserves changes in quantizer
2035                 if (overflow)
2036                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2037             } else
2038                 s->block_last_index[i] = -1;
2039         }
2040         if (s->quantizer_noise_shaping) {
2041             for (i = 0; i < mb_block_count; i++) {
2042                 if (!skip_dct[i]) {
2043                     s->block_last_index[i] =
2044                         dct_quantize_refine(s, s->block[i], weight[i],
2045                                             orig[i], i, s->qscale);
2046                 }
2047             }
2048         }
2049
2050         if (s->luma_elim_threshold && !s->mb_intra)
2051             for (i = 0; i < 4; i++)
2052                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2053         if (s->chroma_elim_threshold && !s->mb_intra)
2054             for (i = 4; i < mb_block_count; i++)
2055                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2056
2057         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2058             for (i = 0; i < mb_block_count; i++) {
2059                 if (s->block_last_index[i] == -1)
2060                     s->coded_score[i] = INT_MAX / 256;
2061             }
2062         }
2063     }
2064
2065     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2066         s->block_last_index[4] =
2067         s->block_last_index[5] = 0;
2068         s->block[4][0] =
2069         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2070     }
2071
2072     // non c quantize code returns incorrect block_last_index FIXME
2073     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2074         for (i = 0; i < mb_block_count; i++) {
2075             int j;
2076             if (s->block_last_index[i] > 0) {
2077                 for (j = 63; j > 0; j--) {
2078                     if (s->block[i][s->intra_scantable.permutated[j]])
2079                         break;
2080                 }
2081                 s->block_last_index[i] = j;
2082             }
2083         }
2084     }
2085
2086     /* huffman encode */
2087     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2088     case CODEC_ID_MPEG1VIDEO:
2089     case CODEC_ID_MPEG2VIDEO:
2090         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2091             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2092         break;
2093     case CODEC_ID_MPEG4:
2094         if (CONFIG_MPEG4_ENCODER)
2095             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2096         break;
2097     case CODEC_ID_MSMPEG4V2:
2098     case CODEC_ID_MSMPEG4V3:
2099     case CODEC_ID_WMV1:
2100         if (CONFIG_MSMPEG4_ENCODER)
2101             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2102         break;
2103     case CODEC_ID_WMV2:
2104         if (CONFIG_WMV2_ENCODER)
2105             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2106         break;
2107     case CODEC_ID_H261:
2108         if (CONFIG_H261_ENCODER)
2109             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2110         break;
2111     case CODEC_ID_H263:
2112     case CODEC_ID_H263P:
2113     case CODEC_ID_FLV1:
2114     case CODEC_ID_RV10:
2115     case CODEC_ID_RV20:
2116         if (CONFIG_H263_ENCODER)
2117             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2118         break;
2119     case CODEC_ID_MJPEG:
2120     case CODEC_ID_AMV:
2121         if (CONFIG_MJPEG_ENCODER)
2122             ff_mjpeg_encode_mb(s, s->block);
2123         break;
2124     default:
2125         av_assert1(0);
2126     }
2127 }
2128
2129 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2130 {
2131     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2132     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2133 }
2134
2135 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2136     int i;
2137
2138     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2139
2140     /* mpeg1 */
2141     d->mb_skip_run= s->mb_skip_run;
2142     for(i=0; i<3; i++)
2143         d->last_dc[i] = s->last_dc[i];
2144
2145     /* statistics */
2146     d->mv_bits= s->mv_bits;
2147     d->i_tex_bits= s->i_tex_bits;
2148     d->p_tex_bits= s->p_tex_bits;
2149     d->i_count= s->i_count;
2150     d->f_count= s->f_count;
2151     d->b_count= s->b_count;
2152     d->skip_count= s->skip_count;
2153     d->misc_bits= s->misc_bits;
2154     d->last_bits= 0;
2155
2156     d->mb_skipped= 0;
2157     d->qscale= s->qscale;
2158     d->dquant= s->dquant;
2159
2160     d->esc3_level_length= s->esc3_level_length;
2161 }
2162
2163 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2164     int i;
2165
2166     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2167     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2168
2169     /* mpeg1 */
2170     d->mb_skip_run= s->mb_skip_run;
2171     for(i=0; i<3; i++)
2172         d->last_dc[i] = s->last_dc[i];
2173
2174     /* statistics */
2175     d->mv_bits= s->mv_bits;
2176     d->i_tex_bits= s->i_tex_bits;
2177     d->p_tex_bits= s->p_tex_bits;
2178     d->i_count= s->i_count;
2179     d->f_count= s->f_count;
2180     d->b_count= s->b_count;
2181     d->skip_count= s->skip_count;
2182     d->misc_bits= s->misc_bits;
2183
2184     d->mb_intra= s->mb_intra;
2185     d->mb_skipped= s->mb_skipped;
2186     d->mv_type= s->mv_type;
2187     d->mv_dir= s->mv_dir;
2188     d->pb= s->pb;
2189     if(s->data_partitioning){
2190         d->pb2= s->pb2;
2191         d->tex_pb= s->tex_pb;
2192     }
2193     d->block= s->block;
2194     for(i=0; i<8; i++)
2195         d->block_last_index[i]= s->block_last_index[i];
2196     d->interlaced_dct= s->interlaced_dct;
2197     d->qscale= s->qscale;
2198
2199     d->esc3_level_length= s->esc3_level_length;
2200 }
2201
2202 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2203                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2204                            int *dmin, int *next_block, int motion_x, int motion_y)
2205 {
2206     int score;
2207     uint8_t *dest_backup[3];
2208
2209     copy_context_before_encode(s, backup, type);
2210
2211     s->block= s->blocks[*next_block];
2212     s->pb= pb[*next_block];
2213     if(s->data_partitioning){
2214         s->pb2   = pb2   [*next_block];
2215         s->tex_pb= tex_pb[*next_block];
2216     }
2217
2218     if(*next_block){
2219         memcpy(dest_backup, s->dest, sizeof(s->dest));
2220         s->dest[0] = s->rd_scratchpad;
2221         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2222         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2223         assert(s->linesize >= 32); //FIXME
2224     }
2225
2226     encode_mb(s, motion_x, motion_y);
2227
2228     score= put_bits_count(&s->pb);
2229     if(s->data_partitioning){
2230         score+= put_bits_count(&s->pb2);
2231         score+= put_bits_count(&s->tex_pb);
2232     }
2233
2234     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2235         ff_MPV_decode_mb(s, s->block);
2236
2237         score *= s->lambda2;
2238         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2239     }
2240
2241     if(*next_block){
2242         memcpy(s->dest, dest_backup, sizeof(s->dest));
2243     }
2244
2245     if(score<*dmin){
2246         *dmin= score;
2247         *next_block^=1;
2248
2249         copy_context_after_encode(best, s, type);
2250     }
2251 }
2252
2253 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2254     uint32_t *sq = ff_squareTbl + 256;
2255     int acc=0;
2256     int x,y;
2257
2258     if(w==16 && h==16)
2259         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2260     else if(w==8 && h==8)
2261         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2262
2263     for(y=0; y<h; y++){
2264         for(x=0; x<w; x++){
2265             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2266         }
2267     }
2268
2269     av_assert2(acc>=0);
2270
2271     return acc;
2272 }
2273
2274 static int sse_mb(MpegEncContext *s){
2275     int w= 16;
2276     int h= 16;
2277
2278     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2279     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2280
2281     if(w==16 && h==16)
2282       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2283         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2284                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2285                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2286       }else{
2287         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2288                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2289                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2290       }
2291     else
2292         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2293                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2294                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2295 }
2296
2297 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2298     MpegEncContext *s= *(void**)arg;
2299
2300
2301     s->me.pre_pass=1;
2302     s->me.dia_size= s->avctx->pre_dia_size;
2303     s->first_slice_line=1;
2304     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2305         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2306             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2307         }
2308         s->first_slice_line=0;
2309     }
2310
2311     s->me.pre_pass=0;
2312
2313     return 0;
2314 }
2315
2316 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2317     MpegEncContext *s= *(void**)arg;
2318
2319     ff_check_alignment();
2320
2321     s->me.dia_size= s->avctx->dia_size;
2322     s->first_slice_line=1;
2323     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2324         s->mb_x=0; //for block init below
2325         ff_init_block_index(s);
2326         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2327             s->block_index[0]+=2;
2328             s->block_index[1]+=2;
2329             s->block_index[2]+=2;
2330             s->block_index[3]+=2;
2331
2332             /* compute motion vector & mb_type and store in context */
2333             if(s->pict_type==AV_PICTURE_TYPE_B)
2334                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2335             else
2336                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2337         }
2338         s->first_slice_line=0;
2339     }
2340     return 0;
2341 }
2342
2343 static int mb_var_thread(AVCodecContext *c, void *arg){
2344     MpegEncContext *s= *(void**)arg;
2345     int mb_x, mb_y;
2346
2347     ff_check_alignment();
2348
2349     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2350         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2351             int xx = mb_x * 16;
2352             int yy = mb_y * 16;
2353             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2354             int varc;
2355             int sum = s->dsp.pix_sum(pix, s->linesize);
2356
2357             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2358
2359             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2360             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2361             s->me.mb_var_sum_temp    += varc;
2362         }
2363     }
2364     return 0;
2365 }
2366
2367 static void write_slice_end(MpegEncContext *s){
2368     if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4){
2369         if(s->partitioned_frame){
2370             ff_mpeg4_merge_partitions(s);
2371         }
2372
2373         ff_mpeg4_stuffing(&s->pb);
2374     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2375         ff_mjpeg_encode_stuffing(s);
2376     }
2377
2378     avpriv_align_put_bits(&s->pb);
2379     flush_put_bits(&s->pb);
2380
2381     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2382         s->misc_bits+= get_bits_diff(s);
2383 }
2384
2385 static void write_mb_info(MpegEncContext *s)
2386 {
2387     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2388     int offset = put_bits_count(&s->pb);
2389     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2390     int gobn = s->mb_y / s->gob_index;
2391     int pred_x, pred_y;
2392     if (CONFIG_H263_ENCODER)
2393         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2394     bytestream_put_le32(&ptr, offset);
2395     bytestream_put_byte(&ptr, s->qscale);
2396     bytestream_put_byte(&ptr, gobn);
2397     bytestream_put_le16(&ptr, mba);
2398     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2399     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2400     /* 4MV not implemented */
2401     bytestream_put_byte(&ptr, 0); /* hmv2 */
2402     bytestream_put_byte(&ptr, 0); /* vmv2 */
2403 }
2404
2405 static void update_mb_info(MpegEncContext *s, int startcode)
2406 {
2407     if (!s->mb_info)
2408         return;
2409     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2410         s->mb_info_size += 12;
2411         s->prev_mb_info = s->last_mb_info;
2412     }
2413     if (startcode) {
2414         s->prev_mb_info = put_bits_count(&s->pb)/8;
2415         /* This might have incremented mb_info_size above, and we return without
2416          * actually writing any info into that slot yet. But in that case,
2417          * this will be called again at the start of the after writing the
2418          * start code, actually writing the mb info. */
2419         return;
2420     }
2421
2422     s->last_mb_info = put_bits_count(&s->pb)/8;
2423     if (!s->mb_info_size)
2424         s->mb_info_size += 12;
2425     write_mb_info(s);
2426 }
2427
2428 static int encode_thread(AVCodecContext *c, void *arg){
2429     MpegEncContext *s= *(void**)arg;
2430     int mb_x, mb_y, pdif = 0;
2431     int chr_h= 16>>s->chroma_y_shift;
2432     int i, j;
2433     MpegEncContext best_s, backup_s;
2434     uint8_t bit_buf[2][MAX_MB_BYTES];
2435     uint8_t bit_buf2[2][MAX_MB_BYTES];
2436     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2437     PutBitContext pb[2], pb2[2], tex_pb[2];
2438 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2439
2440     ff_check_alignment();
2441
2442     for(i=0; i<2; i++){
2443         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2444         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2445         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2446     }
2447
2448     s->last_bits= put_bits_count(&s->pb);
2449     s->mv_bits=0;
2450     s->misc_bits=0;
2451     s->i_tex_bits=0;
2452     s->p_tex_bits=0;
2453     s->i_count=0;
2454     s->f_count=0;
2455     s->b_count=0;
2456     s->skip_count=0;
2457
2458     for(i=0; i<3; i++){
2459         /* init last dc values */
2460         /* note: quant matrix value (8) is implied here */
2461         s->last_dc[i] = 128 << s->intra_dc_precision;
2462
2463         s->current_picture.f.error[i] = 0;
2464     }
2465     if(s->codec_id==CODEC_ID_AMV){
2466         s->last_dc[0] = 128*8/13;
2467         s->last_dc[1] = 128*8/14;
2468         s->last_dc[2] = 128*8/14;
2469     }
2470     s->mb_skip_run = 0;
2471     memset(s->last_mv, 0, sizeof(s->last_mv));
2472
2473     s->last_mv_dir = 0;
2474
2475     switch(s->codec_id){
2476     case CODEC_ID_H263:
2477     case CODEC_ID_H263P:
2478     case CODEC_ID_FLV1:
2479         if (CONFIG_H263_ENCODER)
2480             s->gob_index = ff_h263_get_gob_height(s);
2481         break;
2482     case CODEC_ID_MPEG4:
2483         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2484             ff_mpeg4_init_partitions(s);
2485         break;
2486     }
2487
2488     s->resync_mb_x=0;
2489     s->resync_mb_y=0;
2490     s->first_slice_line = 1;
2491     s->ptr_lastgob = s->pb.buf;
2492     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2493 //    printf("row %d at %X\n", s->mb_y, (int)s);
2494         s->mb_x=0;
2495         s->mb_y= mb_y;
2496
2497         ff_set_qscale(s, s->qscale);
2498         ff_init_block_index(s);
2499
2500         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2501             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2502             int mb_type= s->mb_type[xy];
2503 //            int d;
2504             int dmin= INT_MAX;
2505             int dir;
2506
2507             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2508                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2509                 return -1;
2510             }
2511             if(s->data_partitioning){
2512                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2513                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2514                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2515                     return -1;
2516                 }
2517             }
2518
2519             s->mb_x = mb_x;
2520             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2521             ff_update_block_index(s);
2522
2523             if(CONFIG_H261_ENCODER && s->codec_id == CODEC_ID_H261){
2524                 ff_h261_reorder_mb_index(s);
2525                 xy= s->mb_y*s->mb_stride + s->mb_x;
2526                 mb_type= s->mb_type[xy];
2527             }
2528
2529             /* write gob / video packet header  */
2530             if(s->rtp_mode){
2531                 int current_packet_size, is_gob_start;
2532
2533                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2534
2535                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2536
2537                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2538
2539                 switch(s->codec_id){
2540                 case CODEC_ID_H263:
2541                 case CODEC_ID_H263P:
2542                     if(!s->h263_slice_structured)
2543                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2544                     break;
2545                 case CODEC_ID_MPEG2VIDEO:
2546                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2547                 case CODEC_ID_MPEG1VIDEO:
2548                     if(s->mb_skip_run) is_gob_start=0;
2549                     break;
2550                 case CODEC_ID_MJPEG:
2551                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2552                     break;
2553                 }
2554
2555                 if(is_gob_start){
2556                     if(s->start_mb_y != mb_y || mb_x!=0){
2557                         write_slice_end(s);
2558                         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
2559                             ff_mpeg4_init_partitions(s);
2560                         }
2561                     }
2562
2563                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2564                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2565
2566                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2567                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2568                         int d= 100 / s->avctx->error_rate;
2569                         if(r % d == 0){
2570                             current_packet_size=0;
2571                             s->pb.buf_ptr= s->ptr_lastgob;
2572                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2573                         }
2574                     }
2575
2576                     if (s->avctx->rtp_callback){
2577                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2578                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2579                     }
2580                     update_mb_info(s, 1);
2581
2582                     switch(s->codec_id){
2583                     case CODEC_ID_MPEG4:
2584                         if (CONFIG_MPEG4_ENCODER) {
2585                             ff_mpeg4_encode_video_packet_header(s);
2586                             ff_mpeg4_clean_buffers(s);
2587                         }
2588                     break;
2589                     case CODEC_ID_MPEG1VIDEO:
2590                     case CODEC_ID_MPEG2VIDEO:
2591                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2592                             ff_mpeg1_encode_slice_header(s);
2593                             ff_mpeg1_clean_buffers(s);
2594                         }
2595                     break;
2596                     case CODEC_ID_H263:
2597                     case CODEC_ID_H263P:
2598                         if (CONFIG_H263_ENCODER)
2599                             ff_h263_encode_gob_header(s, mb_y);
2600                     break;
2601                     }
2602
2603                     if(s->flags&CODEC_FLAG_PASS1){
2604                         int bits= put_bits_count(&s->pb);
2605                         s->misc_bits+= bits - s->last_bits;
2606                         s->last_bits= bits;
2607                     }
2608
2609                     s->ptr_lastgob += current_packet_size;
2610                     s->first_slice_line=1;
2611                     s->resync_mb_x=mb_x;
2612                     s->resync_mb_y=mb_y;
2613                 }
2614             }
2615
2616             if(  (s->resync_mb_x   == s->mb_x)
2617                && s->resync_mb_y+1 == s->mb_y){
2618                 s->first_slice_line=0;
2619             }
2620
2621             s->mb_skipped=0;
2622             s->dquant=0; //only for QP_RD
2623
2624             update_mb_info(s, 0);
2625
2626             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2627                 int next_block=0;
2628                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2629
2630                 copy_context_before_encode(&backup_s, s, -1);
2631                 backup_s.pb= s->pb;
2632                 best_s.data_partitioning= s->data_partitioning;
2633                 best_s.partitioned_frame= s->partitioned_frame;
2634                 if(s->data_partitioning){
2635                     backup_s.pb2= s->pb2;
2636                     backup_s.tex_pb= s->tex_pb;
2637                 }
2638
2639                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2640                     s->mv_dir = MV_DIR_FORWARD;
2641                     s->mv_type = MV_TYPE_16X16;
2642                     s->mb_intra= 0;
2643                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2644                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2645                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2646                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2647                 }
2648                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2649                     s->mv_dir = MV_DIR_FORWARD;
2650                     s->mv_type = MV_TYPE_FIELD;
2651                     s->mb_intra= 0;
2652                     for(i=0; i<2; i++){
2653                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2654                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2655                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2656                     }
2657                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2658                                  &dmin, &next_block, 0, 0);
2659                 }
2660                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2661                     s->mv_dir = MV_DIR_FORWARD;
2662                     s->mv_type = MV_TYPE_16X16;
2663                     s->mb_intra= 0;
2664                     s->mv[0][0][0] = 0;
2665                     s->mv[0][0][1] = 0;
2666                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2667                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2668                 }
2669                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2670                     s->mv_dir = MV_DIR_FORWARD;
2671                     s->mv_type = MV_TYPE_8X8;
2672                     s->mb_intra= 0;
2673                     for(i=0; i<4; i++){
2674                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2675                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2676                     }
2677                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2678                                  &dmin, &next_block, 0, 0);
2679                 }
2680                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2681                     s->mv_dir = MV_DIR_FORWARD;
2682                     s->mv_type = MV_TYPE_16X16;
2683                     s->mb_intra= 0;
2684                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2685                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2686                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2687                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2688                 }
2689                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2690                     s->mv_dir = MV_DIR_BACKWARD;
2691                     s->mv_type = MV_TYPE_16X16;
2692                     s->mb_intra= 0;
2693                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2694                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2695                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2696                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2697                 }
2698                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2699                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2700                     s->mv_type = MV_TYPE_16X16;
2701                     s->mb_intra= 0;
2702                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2703                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2704                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2705                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2706                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2707                                  &dmin, &next_block, 0, 0);
2708                 }
2709                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2710                     s->mv_dir = MV_DIR_FORWARD;
2711                     s->mv_type = MV_TYPE_FIELD;
2712                     s->mb_intra= 0;
2713                     for(i=0; i<2; i++){
2714                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2715                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2716                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2717                     }
2718                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2719                                  &dmin, &next_block, 0, 0);
2720                 }
2721                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2722                     s->mv_dir = MV_DIR_BACKWARD;
2723                     s->mv_type = MV_TYPE_FIELD;
2724                     s->mb_intra= 0;
2725                     for(i=0; i<2; i++){
2726                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2727                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2728                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2729                     }
2730                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2731                                  &dmin, &next_block, 0, 0);
2732                 }
2733                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2734                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2735                     s->mv_type = MV_TYPE_FIELD;
2736                     s->mb_intra= 0;
2737                     for(dir=0; dir<2; dir++){
2738                         for(i=0; i<2; i++){
2739                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2740                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2741                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2742                         }
2743                     }
2744                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2745                                  &dmin, &next_block, 0, 0);
2746                 }
2747                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2748                     s->mv_dir = 0;
2749                     s->mv_type = MV_TYPE_16X16;
2750                     s->mb_intra= 1;
2751                     s->mv[0][0][0] = 0;
2752                     s->mv[0][0][1] = 0;
2753                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2754                                  &dmin, &next_block, 0, 0);
2755                     if(s->h263_pred || s->h263_aic){
2756                         if(best_s.mb_intra)
2757                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2758                         else
2759                             ff_clean_intra_table_entries(s); //old mode?
2760                     }
2761                 }
2762
2763                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2764                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2765                         const int last_qp= backup_s.qscale;
2766                         int qpi, qp, dc[6];
2767                         DCTELEM ac[6][16];
2768                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2769                         static const int dquant_tab[4]={-1,1,-2,2};
2770
2771                         av_assert2(backup_s.dquant == 0);
2772
2773                         //FIXME intra
2774                         s->mv_dir= best_s.mv_dir;
2775                         s->mv_type = MV_TYPE_16X16;
2776                         s->mb_intra= best_s.mb_intra;
2777                         s->mv[0][0][0] = best_s.mv[0][0][0];
2778                         s->mv[0][0][1] = best_s.mv[0][0][1];
2779                         s->mv[1][0][0] = best_s.mv[1][0][0];
2780                         s->mv[1][0][1] = best_s.mv[1][0][1];
2781
2782                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2783                         for(; qpi<4; qpi++){
2784                             int dquant= dquant_tab[qpi];
2785                             qp= last_qp + dquant;
2786                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2787                                 continue;
2788                             backup_s.dquant= dquant;
2789                             if(s->mb_intra && s->dc_val[0]){
2790                                 for(i=0; i<6; i++){
2791                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2792                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2793                                 }
2794                             }
2795
2796                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2797                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2798                             if(best_s.qscale != qp){
2799                                 if(s->mb_intra && s->dc_val[0]){
2800                                     for(i=0; i<6; i++){
2801                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2802                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2803                                     }
2804                                 }
2805                             }
2806                         }
2807                     }
2808                 }
2809                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2810                     int mx= s->b_direct_mv_table[xy][0];
2811                     int my= s->b_direct_mv_table[xy][1];
2812
2813                     backup_s.dquant = 0;
2814                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2815                     s->mb_intra= 0;
2816                     ff_mpeg4_set_direct_mv(s, mx, my);
2817                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2818                                  &dmin, &next_block, mx, my);
2819                 }
2820                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2821                     backup_s.dquant = 0;
2822                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2823                     s->mb_intra= 0;
2824                     ff_mpeg4_set_direct_mv(s, 0, 0);
2825                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2826                                  &dmin, &next_block, 0, 0);
2827                 }
2828                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2829                     int coded=0;
2830                     for(i=0; i<6; i++)
2831                         coded |= s->block_last_index[i];
2832                     if(coded){
2833                         int mx,my;
2834                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2835                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2836                             mx=my=0; //FIXME find the one we actually used
2837                             ff_mpeg4_set_direct_mv(s, mx, my);
2838                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2839                             mx= s->mv[1][0][0];
2840                             my= s->mv[1][0][1];
2841                         }else{
2842                             mx= s->mv[0][0][0];
2843                             my= s->mv[0][0][1];
2844                         }
2845
2846                         s->mv_dir= best_s.mv_dir;
2847                         s->mv_type = best_s.mv_type;
2848                         s->mb_intra= 0;
2849 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2850                         s->mv[0][0][1] = best_s.mv[0][0][1];
2851                         s->mv[1][0][0] = best_s.mv[1][0][0];
2852                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2853                         backup_s.dquant= 0;
2854                         s->skipdct=1;
2855                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2856                                         &dmin, &next_block, mx, my);
2857                         s->skipdct=0;
2858                     }
2859                 }
2860
2861                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2862
2863                 copy_context_after_encode(s, &best_s, -1);
2864
2865                 pb_bits_count= put_bits_count(&s->pb);
2866                 flush_put_bits(&s->pb);
2867                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2868                 s->pb= backup_s.pb;
2869
2870                 if(s->data_partitioning){
2871                     pb2_bits_count= put_bits_count(&s->pb2);
2872                     flush_put_bits(&s->pb2);
2873                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2874                     s->pb2= backup_s.pb2;
2875
2876                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2877                     flush_put_bits(&s->tex_pb);
2878                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2879                     s->tex_pb= backup_s.tex_pb;
2880                 }
2881                 s->last_bits= put_bits_count(&s->pb);
2882
2883                 if (CONFIG_H263_ENCODER &&
2884                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2885                     ff_h263_update_motion_val(s);
2886
2887                 if(next_block==0){ //FIXME 16 vs linesize16
2888                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2889                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2890                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2891                 }
2892
2893                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2894                     ff_MPV_decode_mb(s, s->block);
2895             } else {
2896                 int motion_x = 0, motion_y = 0;
2897                 s->mv_type=MV_TYPE_16X16;
2898                 // only one MB-Type possible
2899
2900                 switch(mb_type){
2901                 case CANDIDATE_MB_TYPE_INTRA:
2902                     s->mv_dir = 0;
2903                     s->mb_intra= 1;
2904                     motion_x= s->mv[0][0][0] = 0;
2905                     motion_y= s->mv[0][0][1] = 0;
2906                     break;
2907                 case CANDIDATE_MB_TYPE_INTER:
2908                     s->mv_dir = MV_DIR_FORWARD;
2909                     s->mb_intra= 0;
2910                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2911                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2912                     break;
2913                 case CANDIDATE_MB_TYPE_INTER_I:
2914                     s->mv_dir = MV_DIR_FORWARD;
2915                     s->mv_type = MV_TYPE_FIELD;
2916                     s->mb_intra= 0;
2917                     for(i=0; i<2; i++){
2918                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2919                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2920                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2921                     }
2922                     break;
2923                 case CANDIDATE_MB_TYPE_INTER4V:
2924                     s->mv_dir = MV_DIR_FORWARD;
2925                     s->mv_type = MV_TYPE_8X8;
2926                     s->mb_intra= 0;
2927                     for(i=0; i<4; i++){
2928                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2929                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2930                     }
2931                     break;
2932                 case CANDIDATE_MB_TYPE_DIRECT:
2933                     if (CONFIG_MPEG4_ENCODER) {
2934                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2935                         s->mb_intra= 0;
2936                         motion_x=s->b_direct_mv_table[xy][0];
2937                         motion_y=s->b_direct_mv_table[xy][1];
2938                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2939                     }
2940                     break;
2941                 case CANDIDATE_MB_TYPE_DIRECT0:
2942                     if (CONFIG_MPEG4_ENCODER) {
2943                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2944                         s->mb_intra= 0;
2945                         ff_mpeg4_set_direct_mv(s, 0, 0);
2946                     }
2947                     break;
2948                 case CANDIDATE_MB_TYPE_BIDIR:
2949                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2950                     s->mb_intra= 0;
2951                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2952                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2953                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2954                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2955                     break;
2956                 case CANDIDATE_MB_TYPE_BACKWARD:
2957                     s->mv_dir = MV_DIR_BACKWARD;
2958                     s->mb_intra= 0;
2959                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2960                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2961                     break;
2962                 case CANDIDATE_MB_TYPE_FORWARD:
2963                     s->mv_dir = MV_DIR_FORWARD;
2964                     s->mb_intra= 0;
2965                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2966                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2967 //                    printf(" %d %d ", motion_x, motion_y);
2968                     break;
2969                 case CANDIDATE_MB_TYPE_FORWARD_I:
2970                     s->mv_dir = MV_DIR_FORWARD;
2971                     s->mv_type = MV_TYPE_FIELD;
2972                     s->mb_intra= 0;
2973                     for(i=0; i<2; i++){
2974                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2975                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2976                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2977                     }
2978                     break;
2979                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2980                     s->mv_dir = MV_DIR_BACKWARD;
2981                     s->mv_type = MV_TYPE_FIELD;
2982                     s->mb_intra= 0;
2983                     for(i=0; i<2; i++){
2984                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2985                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2986                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2987                     }
2988                     break;
2989                 case CANDIDATE_MB_TYPE_BIDIR_I:
2990                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2991                     s->mv_type = MV_TYPE_FIELD;
2992                     s->mb_intra= 0;
2993                     for(dir=0; dir<2; dir++){
2994                         for(i=0; i<2; i++){
2995                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2996                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2997                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2998                         }
2999                     }
3000                     break;
3001                 default:
3002                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3003                 }
3004
3005                 encode_mb(s, motion_x, motion_y);
3006
3007                 // RAL: Update last macroblock type
3008                 s->last_mv_dir = s->mv_dir;
3009
3010                 if (CONFIG_H263_ENCODER &&
3011                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3012                     ff_h263_update_motion_val(s);
3013
3014                 ff_MPV_decode_mb(s, s->block);
3015             }
3016
3017             /* clean the MV table in IPS frames for direct mode in B frames */
3018             if(s->mb_intra /* && I,P,S_TYPE */){
3019                 s->p_mv_table[xy][0]=0;
3020                 s->p_mv_table[xy][1]=0;
3021             }
3022
3023             if(s->flags&CODEC_FLAG_PSNR){
3024                 int w= 16;
3025                 int h= 16;
3026
3027                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3028                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3029
3030                 s->current_picture.f.error[0] += sse(
3031                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3032                     s->dest[0], w, h, s->linesize);
3033                 s->current_picture.f.error[1] += sse(
3034                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3035                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3036                 s->current_picture.f.error[2] += sse(
3037                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3038                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3039             }
3040             if(s->loop_filter){
3041                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3042                     ff_h263_loop_filter(s);
3043             }
3044 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
3045         }
3046     }
3047
3048     //not beautiful here but we must write it before flushing so it has to be here
3049     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3050         ff_msmpeg4_encode_ext_header(s);
3051
3052     write_slice_end(s);
3053
3054     /* Send the last GOB if RTP */
3055     if (s->avctx->rtp_callback) {
3056         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3057         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3058         /* Call the RTP callback to send the last GOB */
3059         emms_c();
3060         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3061     }
3062
3063     return 0;
3064 }
3065
3066 #define MERGE(field) dst->field += src->field; src->field=0
3067 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3068     MERGE(me.scene_change_score);
3069     MERGE(me.mc_mb_var_sum_temp);
3070     MERGE(me.mb_var_sum_temp);
3071 }
3072
3073 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3074     int i;
3075
3076     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3077     MERGE(dct_count[1]);
3078     MERGE(mv_bits);
3079     MERGE(i_tex_bits);
3080     MERGE(p_tex_bits);
3081     MERGE(i_count);
3082     MERGE(f_count);
3083     MERGE(b_count);
3084     MERGE(skip_count);
3085     MERGE(misc_bits);
3086     MERGE(error_count);
3087     MERGE(padding_bug_score);
3088     MERGE(current_picture.f.error[0]);
3089     MERGE(current_picture.f.error[1]);
3090     MERGE(current_picture.f.error[2]);
3091
3092     if(dst->avctx->noise_reduction){
3093         for(i=0; i<64; i++){
3094             MERGE(dct_error_sum[0][i]);
3095             MERGE(dct_error_sum[1][i]);
3096         }
3097     }
3098
3099     assert(put_bits_count(&src->pb) % 8 ==0);
3100     assert(put_bits_count(&dst->pb) % 8 ==0);
3101     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3102     flush_put_bits(&dst->pb);
3103 }
3104
3105 static int estimate_qp(MpegEncContext *s, int dry_run){
3106     if (s->next_lambda){
3107         s->current_picture_ptr->f.quality =
3108         s->current_picture.f.quality = s->next_lambda;
3109         if(!dry_run) s->next_lambda= 0;
3110     } else if (!s->fixed_qscale) {
3111         s->current_picture_ptr->f.quality =
3112         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3113         if (s->current_picture.f.quality < 0)
3114             return -1;
3115     }
3116
3117     if(s->adaptive_quant){
3118         switch(s->codec_id){
3119         case CODEC_ID_MPEG4:
3120             if (CONFIG_MPEG4_ENCODER)
3121                 ff_clean_mpeg4_qscales(s);
3122             break;
3123         case CODEC_ID_H263:
3124         case CODEC_ID_H263P:
3125         case CODEC_ID_FLV1:
3126             if (CONFIG_H263_ENCODER)
3127                 ff_clean_h263_qscales(s);
3128             break;
3129         default:
3130             ff_init_qscale_tab(s);
3131         }
3132
3133         s->lambda= s->lambda_table[0];
3134         //FIXME broken
3135     }else
3136         s->lambda = s->current_picture.f.quality;
3137 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3138     update_qscale(s);
3139     return 0;
3140 }
3141
3142 /* must be called before writing the header */
3143 static void set_frame_distances(MpegEncContext * s){
3144     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3145     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3146
3147     if(s->pict_type==AV_PICTURE_TYPE_B){
3148         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3149         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3150     }else{
3151         s->pp_time= s->time - s->last_non_b_time;
3152         s->last_non_b_time= s->time;
3153         assert(s->picture_number==0 || s->pp_time > 0);
3154     }
3155 }
3156
3157 static int encode_picture(MpegEncContext *s, int picture_number)
3158 {
3159     int i;
3160     int bits;
3161     int context_count = s->slice_context_count;
3162
3163     s->picture_number = picture_number;
3164
3165     /* Reset the average MB variance */
3166     s->me.mb_var_sum_temp    =
3167     s->me.mc_mb_var_sum_temp = 0;
3168
3169     /* we need to initialize some time vars before we can encode b-frames */
3170     // RAL: Condition added for MPEG1VIDEO
3171     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3172         set_frame_distances(s);
3173     if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4)
3174         ff_set_mpeg4_time(s);
3175
3176     s->me.scene_change_score=0;
3177
3178 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3179
3180     if(s->pict_type==AV_PICTURE_TYPE_I){
3181         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3182         else                        s->no_rounding=0;
3183     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3184         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3185             s->no_rounding ^= 1;
3186     }
3187
3188     if(s->flags & CODEC_FLAG_PASS2){
3189         if (estimate_qp(s,1) < 0)
3190             return -1;
3191         ff_get_2pass_fcode(s);
3192     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3193         if(s->pict_type==AV_PICTURE_TYPE_B)
3194             s->lambda= s->last_lambda_for[s->pict_type];
3195         else
3196             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3197         update_qscale(s);
3198     }
3199
3200     if(s->codec_id != CODEC_ID_AMV){
3201         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3202         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3203         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3204         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3205     }
3206
3207     s->mb_intra=0; //for the rate distortion & bit compare functions
3208     for(i=1; i<context_count; i++){
3209         ff_update_duplicate_context(s->thread_context[i], s);
3210     }
3211
3212     if(ff_init_me(s)<0)
3213         return -1;
3214
3215     /* Estimate motion for every MB */
3216     if(s->pict_type != AV_PICTURE_TYPE_I){
3217         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3218         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3219         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3220             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3221                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3222             }
3223         }
3224
3225         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3226     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3227         /* I-Frame */
3228         for(i=0; i<s->mb_stride*s->mb_height; i++)
3229             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3230
3231         if(!s->fixed_qscale){
3232             /* finding spatial complexity for I-frame rate control */
3233             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3234         }
3235     }
3236     for(i=1; i<context_count; i++){
3237         merge_context_after_me(s, s->thread_context[i]);
3238     }
3239     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3240     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3241     emms_c();
3242
3243     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3244         s->pict_type= AV_PICTURE_TYPE_I;
3245         for(i=0; i<s->mb_stride*s->mb_height; i++)
3246             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3247 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3248         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3249     }
3250
3251     if(!s->umvplus){
3252         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3253             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3254
3255             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3256                 int a,b;
3257                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3258                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3259                 s->f_code= FFMAX3(s->f_code, a, b);
3260             }
3261
3262             ff_fix_long_p_mvs(s);
3263             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3264             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3265                 int j;
3266                 for(i=0; i<2; i++){
3267                     for(j=0; j<2; j++)
3268                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3269                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3270                 }
3271             }
3272         }
3273
3274         if(s->pict_type==AV_PICTURE_TYPE_B){
3275             int a, b;
3276
3277             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3278             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3279             s->f_code = FFMAX(a, b);
3280
3281             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3282             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3283             s->b_code = FFMAX(a, b);
3284
3285             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3286             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3287             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3288             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3289             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3290                 int dir, j;
3291                 for(dir=0; dir<2; dir++){
3292                     for(i=0; i<2; i++){
3293                         for(j=0; j<2; j++){
3294                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3295                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3296                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3297                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3298                         }
3299                     }
3300                 }
3301             }
3302         }
3303     }
3304
3305     if (estimate_qp(s, 0) < 0)
3306         return -1;
3307
3308     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3309         s->qscale= 3; //reduce clipping problems
3310
3311     if (s->out_format == FMT_MJPEG) {
3312         /* for mjpeg, we do include qscale in the matrix */
3313         for(i=1;i<64;i++){
3314             int j= s->dsp.idct_permutation[i];
3315
3316             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3317         }
3318         s->y_dc_scale_table=
3319         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3320         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3321         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3322                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3323         s->qscale= 8;
3324     }
3325     if(s->codec_id == CODEC_ID_AMV){
3326         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3327         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3328         for(i=1;i<64;i++){
3329             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3330
3331             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3332             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3333         }
3334         s->y_dc_scale_table= y;
3335         s->c_dc_scale_table= c;
3336         s->intra_matrix[0] = 13;
3337         s->chroma_intra_matrix[0] = 14;
3338         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3339                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3340         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3341                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3342         s->qscale= 8;
3343     }
3344
3345     //FIXME var duplication
3346     s->current_picture_ptr->f.key_frame =
3347     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3348     s->current_picture_ptr->f.pict_type =
3349     s->current_picture.f.pict_type = s->pict_type;
3350
3351     if (s->current_picture.f.key_frame)
3352         s->picture_in_gop_number=0;
3353
3354     s->last_bits= put_bits_count(&s->pb);
3355     switch(s->out_format) {
3356     case FMT_MJPEG:
3357         if (CONFIG_MJPEG_ENCODER)
3358             ff_mjpeg_encode_picture_header(s);
3359         break;
3360     case FMT_H261:
3361         if (CONFIG_H261_ENCODER)
3362             ff_h261_encode_picture_header(s, picture_number);
3363         break;
3364     case FMT_H263:
3365         if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
3366             ff_wmv2_encode_picture_header(s, picture_number);
3367         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3368             ff_msmpeg4_encode_picture_header(s, picture_number);
3369         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3370             ff_mpeg4_encode_picture_header(s, picture_number);
3371         else if (CONFIG_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
3372             ff_rv10_encode_picture_header(s, picture_number);
3373         else if (CONFIG_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
3374             ff_rv20_encode_picture_header(s, picture_number);
3375         else if (CONFIG_FLV_ENCODER && s->codec_id == CODEC_ID_FLV1)
3376             ff_flv_encode_picture_header(s, picture_number);
3377         else if (CONFIG_H263_ENCODER)
3378             ff_h263_encode_picture_header(s, picture_number);
3379         break;
3380     case FMT_MPEG1:
3381         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3382             ff_mpeg1_encode_picture_header(s, picture_number);
3383         break;
3384     case FMT_H264:
3385         break;
3386     default:
3387         av_assert0(0);
3388     }
3389     bits= put_bits_count(&s->pb);
3390     s->header_bits= bits - s->last_bits;
3391
3392     for(i=1; i<context_count; i++){
3393         update_duplicate_context_after_me(s->thread_context[i], s);
3394     }
3395     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3396     for(i=1; i<context_count; i++){
3397         merge_context_after_encode(s, s->thread_context[i]);
3398     }
3399     emms_c();
3400     return 0;
3401 }
3402
3403 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3404     const int intra= s->mb_intra;
3405     int i;
3406
3407     s->dct_count[intra]++;
3408
3409     for(i=0; i<64; i++){
3410         int level= block[i];
3411
3412         if(level){
3413             if(level>0){
3414                 s->dct_error_sum[intra][i] += level;
3415                 level -= s->dct_offset[intra][i];
3416                 if(level<0) level=0;
3417             }else{
3418                 s->dct_error_sum[intra][i] -= level;
3419                 level += s->dct_offset[intra][i];
3420                 if(level>0) level=0;
3421             }
3422             block[i]= level;
3423         }
3424     }
3425 }
3426
3427 static int dct_quantize_trellis_c(MpegEncContext *s,
3428                                   DCTELEM *block, int n,
3429                                   int qscale, int *overflow){
3430     const int *qmat;
3431     const uint8_t *scantable= s->intra_scantable.scantable;
3432     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3433     int max=0;
3434     unsigned int threshold1, threshold2;
3435     int bias=0;
3436     int run_tab[65];
3437     int level_tab[65];
3438     int score_tab[65];
3439     int survivor[65];
3440     int survivor_count;
3441     int last_run=0;
3442     int last_level=0;
3443     int last_score= 0;
3444     int last_i;
3445     int coeff[2][64];
3446     int coeff_count[64];
3447     int qmul, qadd, start_i, last_non_zero, i, dc;
3448     const int esc_length= s->ac_esc_length;
3449     uint8_t * length;
3450     uint8_t * last_length;
3451     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3452
3453     s->dsp.fdct (block);
3454
3455     if(s->dct_error_sum)
3456         s->denoise_dct(s, block);
3457     qmul= qscale*16;
3458     qadd= ((qscale-1)|1)*8;
3459
3460     if (s->mb_intra) {
3461         int q;
3462         if (!s->h263_aic) {
3463             if (n < 4)
3464                 q = s->y_dc_scale;
3465             else
3466                 q = s->c_dc_scale;
3467             q = q << 3;
3468         } else{
3469             /* For AIC we skip quant/dequant of INTRADC */
3470             q = 1 << 3;
3471             qadd=0;
3472         }
3473
3474         /* note: block[0] is assumed to be positive */
3475         block[0] = (block[0] + (q >> 1)) / q;
3476         start_i = 1;
3477         last_non_zero = 0;
3478         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3479         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3480             bias= 1<<(QMAT_SHIFT-1);
3481         length     = s->intra_ac_vlc_length;
3482         last_length= s->intra_ac_vlc_last_length;
3483     } else {
3484         start_i = 0;
3485         last_non_zero = -1;
3486         qmat = s->q_inter_matrix[qscale];
3487         length     = s->inter_ac_vlc_length;
3488         last_length= s->inter_ac_vlc_last_length;
3489     }
3490     last_i= start_i;
3491
3492     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3493     threshold2= (threshold1<<1);
3494
3495     for(i=63; i>=start_i; i--) {
3496         const int j = scantable[i];
3497         int level = block[j] * qmat[j];
3498
3499         if(((unsigned)(level+threshold1))>threshold2){
3500             last_non_zero = i;
3501             break;
3502         }
3503     }
3504
3505     for(i=start_i; i<=last_non_zero; i++) {
3506         const int j = scantable[i];
3507         int level = block[j] * qmat[j];
3508
3509 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3510 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3511         if(((unsigned)(level+threshold1))>threshold2){
3512             if(level>0){
3513                 level= (bias + level)>>QMAT_SHIFT;
3514                 coeff[0][i]= level;
3515                 coeff[1][i]= level-1;
3516 //                coeff[2][k]= level-2;
3517             }else{
3518                 level= (bias - level)>>QMAT_SHIFT;
3519                 coeff[0][i]= -level;
3520                 coeff[1][i]= -level+1;
3521 //                coeff[2][k]= -level+2;
3522             }
3523             coeff_count[i]= FFMIN(level, 2);
3524             av_assert2(coeff_count[i]);
3525             max |=level;
3526         }else{
3527             coeff[0][i]= (level>>31)|1;
3528             coeff_count[i]= 1;
3529         }
3530     }
3531
3532     *overflow= s->max_qcoeff < max; //overflow might have happened
3533
3534     if(last_non_zero < start_i){
3535         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3536         return last_non_zero;
3537     }
3538
3539     score_tab[start_i]= 0;
3540     survivor[0]= start_i;
3541     survivor_count= 1;
3542
3543     for(i=start_i; i<=last_non_zero; i++){
3544         int level_index, j, zero_distortion;
3545         int dct_coeff= FFABS(block[ scantable[i] ]);
3546         int best_score=256*256*256*120;
3547
3548         if (s->dsp.fdct == ff_fdct_ifast)
3549             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3550         zero_distortion= dct_coeff*dct_coeff;
3551
3552         for(level_index=0; level_index < coeff_count[i]; level_index++){
3553             int distortion;
3554             int level= coeff[level_index][i];
3555             const int alevel= FFABS(level);
3556             int unquant_coeff;
3557
3558             av_assert2(level);
3559
3560             if(s->out_format == FMT_H263){
3561                 unquant_coeff= alevel*qmul + qadd;
3562             }else{ //MPEG1
3563                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3564                 if(s->mb_intra){
3565                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3566                         unquant_coeff =   (unquant_coeff - 1) | 1;
3567                 }else{
3568                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3569                         unquant_coeff =   (unquant_coeff - 1) | 1;
3570                 }
3571                 unquant_coeff<<= 3;
3572             }
3573
3574             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3575             level+=64;
3576             if((level&(~127)) == 0){
3577                 for(j=survivor_count-1; j>=0; j--){
3578                     int run= i - survivor[j];
3579                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3580                     score += score_tab[i-run];
3581
3582                     if(score < best_score){
3583                         best_score= score;
3584                         run_tab[i+1]= run;
3585                         level_tab[i+1]= level-64;
3586                     }
3587                 }
3588
3589                 if(s->out_format == FMT_H263){
3590                     for(j=survivor_count-1; j>=0; j--){
3591                         int run= i - survivor[j];
3592                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3593                         score += score_tab[i-run];
3594                         if(score < last_score){
3595                             last_score= score;
3596                             last_run= run;
3597                             last_level= level-64;
3598                             last_i= i+1;
3599                         }
3600                     }
3601                 }
3602             }else{
3603                 distortion += esc_length*lambda;
3604                 for(j=survivor_count-1; j>=0; j--){
3605                     int run= i - survivor[j];
3606                     int score= distortion + score_tab[i-run];
3607
3608                     if(score < best_score){
3609                         best_score= score;
3610                         run_tab[i+1]= run;
3611                         level_tab[i+1]= level-64;
3612                     }
3613                 }
3614
3615                 if(s->out_format == FMT_H263){
3616                   for(j=survivor_count-1; j>=0; j--){
3617                         int run= i - survivor[j];
3618                         int score= distortion + score_tab[i-run];
3619                         if(score < last_score){
3620                             last_score= score;
3621                             last_run= run;
3622                             last_level= level-64;
3623                             last_i= i+1;
3624                         }
3625                     }
3626                 }
3627             }
3628         }
3629
3630         score_tab[i+1]= best_score;
3631
3632         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3633         if(last_non_zero <= 27){
3634             for(; survivor_count; survivor_count--){
3635                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3636                     break;
3637             }
3638         }else{
3639             for(; survivor_count; survivor_count--){
3640                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3641                     break;
3642             }
3643         }
3644
3645         survivor[ survivor_count++ ]= i+1;
3646     }
3647
3648     if(s->out_format != FMT_H263){
3649         last_score= 256*256*256*120;
3650         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3651             int score= score_tab[i];
3652             if(i) score += lambda*2; //FIXME exacter?
3653
3654             if(score < last_score){
3655                 last_score= score;
3656                 last_i= i;
3657                 last_level= level_tab[i];
3658                 last_run= run_tab[i];
3659             }
3660         }
3661     }
3662
3663     s->coded_score[n] = last_score;
3664
3665     dc= FFABS(block[0]);
3666     last_non_zero= last_i - 1;
3667     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3668
3669     if(last_non_zero < start_i)
3670         return last_non_zero;
3671
3672     if(last_non_zero == 0 && start_i == 0){
3673         int best_level= 0;
3674         int best_score= dc * dc;
3675
3676         for(i=0; i<coeff_count[0]; i++){
3677             int level= coeff[i][0];
3678             int alevel= FFABS(level);
3679             int unquant_coeff, score, distortion;
3680
3681             if(s->out_format == FMT_H263){
3682                     unquant_coeff= (alevel*qmul + qadd)>>3;
3683             }else{ //MPEG1
3684                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3685                     unquant_coeff =   (unquant_coeff - 1) | 1;
3686             }
3687             unquant_coeff = (unquant_coeff + 4) >> 3;
3688             unquant_coeff<<= 3 + 3;
3689
3690             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3691             level+=64;
3692             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3693             else                    score= distortion + esc_length*lambda;
3694
3695             if(score < best_score){
3696                 best_score= score;
3697                 best_level= level - 64;
3698             }
3699         }
3700         block[0]= best_level;
3701         s->coded_score[n] = best_score - dc*dc;
3702         if(best_level == 0) return -1;
3703         else                return last_non_zero;
3704     }
3705
3706     i= last_i;
3707     av_assert2(last_level);
3708
3709     block[ perm_scantable[last_non_zero] ]= last_level;
3710     i -= last_run + 1;
3711
3712     for(; i>start_i; i -= run_tab[i] + 1){
3713         block[ perm_scantable[i-1] ]= level_tab[i];
3714     }
3715
3716     return last_non_zero;
3717 }
3718
3719 //#define REFINE_STATS 1
3720 static int16_t basis[64][64];
3721
3722 static void build_basis(uint8_t *perm){
3723     int i, j, x, y;
3724     emms_c();
3725     for(i=0; i<8; i++){
3726         for(j=0; j<8; j++){
3727             for(y=0; y<8; y++){
3728                 for(x=0; x<8; x++){
3729                     double s= 0.25*(1<<BASIS_SHIFT);
3730                     int index= 8*i + j;
3731                     int perm_index= perm[index];
3732                     if(i==0) s*= sqrt(0.5);
3733                     if(j==0) s*= sqrt(0.5);
3734                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3735                 }
3736             }
3737         }
3738     }
3739 }
3740
3741 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3742                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3743                         int n, int qscale){
3744     int16_t rem[64];
3745     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3746     const uint8_t *scantable= s->intra_scantable.scantable;
3747     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3748 //    unsigned int threshold1, threshold2;
3749 //    int bias=0;
3750     int run_tab[65];
3751     int prev_run=0;
3752     int prev_level=0;
3753     int qmul, qadd, start_i, last_non_zero, i, dc;
3754     uint8_t * length;
3755     uint8_t * last_length;
3756     int lambda;
3757     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3758 #ifdef REFINE_STATS
3759 static int count=0;
3760 static int after_last=0;
3761 static int to_zero=0;
3762 static int from_zero=0;
3763 static int raise=0;
3764 static int lower=0;
3765 static int messed_sign=0;
3766 #endif
3767
3768     if(basis[0][0] == 0)
3769         build_basis(s->dsp.idct_permutation);
3770
3771     qmul= qscale*2;
3772     qadd= (qscale-1)|1;
3773     if (s->mb_intra) {
3774         if (!s->h263_aic) {
3775             if (n < 4)
3776                 q = s->y_dc_scale;
3777             else
3778                 q = s->c_dc_scale;
3779         } else{
3780             /* For AIC we skip quant/dequant of INTRADC */
3781             q = 1;
3782             qadd=0;
3783         }
3784         q <<= RECON_SHIFT-3;
3785         /* note: block[0] is assumed to be positive */
3786         dc= block[0]*q;
3787 //        block[0] = (block[0] + (q >> 1)) / q;
3788         start_i = 1;
3789 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3790 //            bias= 1<<(QMAT_SHIFT-1);
3791         length     = s->intra_ac_vlc_length;
3792         last_length= s->intra_ac_vlc_last_length;
3793     } else {
3794         dc= 0;
3795         start_i = 0;
3796         length     = s->inter_ac_vlc_length;
3797         last_length= s->inter_ac_vlc_last_length;
3798     }
3799     last_non_zero = s->block_last_index[n];
3800
3801 #ifdef REFINE_STATS
3802 {START_TIMER
3803 #endif
3804     dc += (1<<(RECON_SHIFT-1));
3805     for(i=0; i<64; i++){
3806         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3807     }
3808 #ifdef REFINE_STATS
3809 STOP_TIMER("memset rem[]")}
3810 #endif
3811     sum=0;
3812     for(i=0; i<64; i++){
3813         int one= 36;
3814         int qns=4;
3815         int w;
3816
3817         w= FFABS(weight[i]) + qns*one;
3818         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3819
3820         weight[i] = w;
3821 //        w=weight[i] = (63*qns + (w/2)) / w;
3822
3823         av_assert2(w>0);
3824         av_assert2(w<(1<<6));
3825         sum += w*w;
3826     }
3827     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3828 #ifdef REFINE_STATS
3829 {START_TIMER
3830 #endif
3831     run=0;
3832     rle_index=0;
3833     for(i=start_i; i<=last_non_zero; i++){
3834         int j= perm_scantable[i];
3835         const int level= block[j];
3836         int coeff;
3837
3838         if(level){
3839             if(level<0) coeff= qmul*level - qadd;
3840             else        coeff= qmul*level + qadd;
3841             run_tab[rle_index++]=run;
3842             run=0;
3843
3844             s->dsp.add_8x8basis(rem, basis[j], coeff);
3845         }else{
3846             run++;
3847         }
3848     }
3849 #ifdef REFINE_STATS
3850 if(last_non_zero>0){
3851 STOP_TIMER("init rem[]")
3852 }
3853 }
3854
3855 {START_TIMER
3856 #endif
3857     for(;;){
3858         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3859         int best_coeff=0;
3860         int best_change=0;
3861         int run2, best_unquant_change=0, analyze_gradient;
3862 #ifdef REFINE_STATS
3863 {START_TIMER
3864 #endif
3865         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3866
3867         if(analyze_gradient){
3868 #ifdef REFINE_STATS
3869 {START_TIMER
3870 #endif
3871             for(i=0; i<64; i++){
3872                 int w= weight[i];
3873
3874                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3875             }
3876 #ifdef REFINE_STATS
3877 STOP_TIMER("rem*w*w")}
3878 {START_TIMER
3879 #endif
3880             s->dsp.fdct(d1);
3881 #ifdef REFINE_STATS
3882 STOP_TIMER("dct")}
3883 #endif
3884         }
3885
3886         if(start_i){
3887             const int level= block[0];
3888             int change, old_coeff;
3889
3890             av_assert2(s->mb_intra);
3891
3892             old_coeff= q*level;
3893
3894             for(change=-1; change<=1; change+=2){
3895                 int new_level= level + change;
3896                 int score, new_coeff;
3897
3898                 new_coeff= q*new_level;
3899                 if(new_coeff >= 2048 || new_coeff < 0)
3900                     continue;
3901
3902                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3903                 if(score<best_score){
3904                     best_score= score;
3905                     best_coeff= 0;
3906                     best_change= change;
3907                     best_unquant_change= new_coeff - old_coeff;
3908                 }
3909             }
3910         }
3911
3912         run=0;
3913         rle_index=0;
3914         run2= run_tab[rle_index++];
3915         prev_level=0;
3916         prev_run=0;
3917
3918         for(i=start_i; i<64; i++){
3919             int j= perm_scantable[i];
3920             const int level= block[j];
3921             int change, old_coeff;
3922
3923             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3924                 break;
3925
3926             if(level){
3927                 if(level<0) old_coeff= qmul*level - qadd;
3928                 else        old_coeff= qmul*level + qadd;
3929                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3930             }else{
3931                 old_coeff=0;
3932                 run2--;
3933                 av_assert2(run2>=0 || i >= last_non_zero );
3934             }
3935
3936             for(change=-1; change<=1; change+=2){
3937                 int new_level= level + change;
3938                 int score, new_coeff, unquant_change;
3939
3940                 score=0;
3941                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3942                    continue;
3943
3944                 if(new_level){
3945                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3946                     else            new_coeff= qmul*new_level + qadd;
3947                     if(new_coeff >= 2048 || new_coeff <= -2048)
3948                         continue;
3949                     //FIXME check for overflow
3950
3951                     if(level){
3952                         if(level < 63 && level > -63){
3953                             if(i < last_non_zero)
3954                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3955                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3956                             else
3957                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3958                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3959                         }
3960                     }else{
3961                         av_assert2(FFABS(new_level)==1);
3962
3963                         if(analyze_gradient){
3964                             int g= d1[ scantable[i] ];
3965                             if(g && (g^new_level) >= 0)
3966                                 continue;
3967                         }
3968
3969                         if(i < last_non_zero){
3970                             int next_i= i + run2 + 1;
3971                             int next_level= block[ perm_scantable[next_i] ] + 64;
3972
3973                             if(next_level&(~127))
3974                                 next_level= 0;
3975
3976                             if(next_i < last_non_zero)
3977                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3978                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3979                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3980                             else
3981                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3982                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3983                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3984                         }else{
3985                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3986                             if(prev_level){
3987                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3988                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3989                             }
3990                         }
3991                     }
3992                 }else{
3993                     new_coeff=0;
3994                     av_assert2(FFABS(level)==1);
3995
3996                     if(i < last_non_zero){
3997                         int next_i= i + run2 + 1;
3998                         int next_level= block[ perm_scantable[next_i] ] + 64;
3999
4000                         if(next_level&(~127))
4001                             next_level= 0;
4002
4003                         if(next_i < last_non_zero)
4004                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4005                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4006                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4007                         else
4008                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4009                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4010                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4011                     }else{
4012                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4013                         if(prev_level){
4014                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4015                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4016                         }
4017                     }
4018                 }
4019
4020                 score *= lambda;
4021
4022                 unquant_change= new_coeff - old_coeff;
4023                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4024
4025                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4026                 if(score<best_score){
4027                     best_score= score;
4028                     best_coeff= i;
4029                     best_change= change;
4030                     best_unquant_change= unquant_change;
4031                 }
4032             }
4033             if(level){
4034                 prev_level= level + 64;
4035                 if(prev_level&(~127))
4036                     prev_level= 0;
4037                 prev_run= run;
4038                 run=0;
4039             }else{
4040                 run++;
4041             }
4042         }
4043 #ifdef REFINE_STATS
4044 STOP_TIMER("iterative step")}
4045 #endif
4046
4047         if(best_change){
4048             int j= perm_scantable[ best_coeff ];
4049
4050             block[j] += best_change;
4051
4052             if(best_coeff > last_non_zero){
4053                 last_non_zero= best_coeff;
4054                 av_assert2(block[j]);
4055 #ifdef REFINE_STATS
4056 after_last++;
4057 #endif
4058             }else{
4059 #ifdef REFINE_STATS
4060 if(block[j]){
4061     if(block[j] - best_change){
4062         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4063             raise++;
4064         }else{
4065             lower++;
4066         }
4067     }else{
4068         from_zero++;
4069     }
4070 }else{
4071     to_zero++;
4072 }
4073 #endif
4074                 for(; last_non_zero>=start_i; last_non_zero--){
4075                     if(block[perm_scantable[last_non_zero]])
4076                         break;
4077                 }
4078             }
4079 #ifdef REFINE_STATS
4080 count++;
4081 if(256*256*256*64 % count == 0){
4082     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4083 }
4084 #endif
4085             run=0;
4086             rle_index=0;
4087             for(i=start_i; i<=last_non_zero; i++){
4088                 int j= perm_scantable[i];
4089                 const int level= block[j];
4090
4091                  if(level){
4092                      run_tab[rle_index++]=run;
4093                      run=0;
4094                  }else{
4095                      run++;
4096                  }
4097             }
4098
4099             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4100         }else{
4101             break;
4102         }
4103     }
4104 #ifdef REFINE_STATS
4105 if(last_non_zero>0){
4106 STOP_TIMER("iterative search")
4107 }
4108 }
4109 #endif
4110
4111     return last_non_zero;
4112 }
4113
4114 int ff_dct_quantize_c(MpegEncContext *s,
4115                         DCTELEM *block, int n,
4116                         int qscale, int *overflow)
4117 {
4118     int i, j, level, last_non_zero, q, start_i;
4119     const int *qmat;
4120     const uint8_t *scantable= s->intra_scantable.scantable;
4121     int bias;
4122     int max=0;
4123     unsigned int threshold1, threshold2;
4124
4125     s->dsp.fdct (block);
4126
4127     if(s->dct_error_sum)
4128         s->denoise_dct(s, block);
4129
4130     if (s->mb_intra) {
4131         if (!s->h263_aic) {
4132             if (n < 4)
4133                 q = s->y_dc_scale;
4134             else
4135                 q = s->c_dc_scale;
4136             q = q << 3;
4137         } else
4138             /* For AIC we skip quant/dequant of INTRADC */
4139             q = 1 << 3;
4140
4141         /* note: block[0] is assumed to be positive */
4142         block[0] = (block[0] + (q >> 1)) / q;
4143         start_i = 1;
4144         last_non_zero = 0;
4145         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4146         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4147     } else {
4148         start_i = 0;
4149         last_non_zero = -1;
4150         qmat = s->q_inter_matrix[qscale];
4151         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4152     }
4153     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4154     threshold2= (threshold1<<1);
4155     for(i=63;i>=start_i;i--) {
4156         j = scantable[i];
4157         level = block[j] * qmat[j];
4158
4159         if(((unsigned)(level+threshold1))>threshold2){
4160             last_non_zero = i;
4161             break;
4162         }else{
4163             block[j]=0;
4164         }
4165     }
4166     for(i=start_i; i<=last_non_zero; i++) {
4167         j = scantable[i];
4168         level = block[j] * qmat[j];
4169
4170 //        if(   bias+level >= (1<<QMAT_SHIFT)
4171 //           || bias-level >= (1<<QMAT_SHIFT)){
4172         if(((unsigned)(level+threshold1))>threshold2){
4173             if(level>0){
4174                 level= (bias + level)>>QMAT_SHIFT;
4175                 block[j]= level;
4176             }else{
4177                 level= (bias - level)>>QMAT_SHIFT;
4178                 block[j]= -level;
4179             }
4180             max |=level;
4181         }else{
4182             block[j]=0;
4183         }
4184     }
4185     *overflow= s->max_qcoeff < max; //overflow might have happened
4186
4187     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4188     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4189         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4190
4191     return last_non_zero;
4192 }
4193
4194 #define OFFSET(x) offsetof(MpegEncContext, x)
4195 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4196 static const AVOption h263_options[] = {
4197     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4198     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4199     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { 0 }, 0, INT_MAX, VE },
4200     FF_MPV_COMMON_OPTS
4201     { NULL },
4202 };
4203
4204 static const AVClass h263_class = {
4205     .class_name = "H.263 encoder",
4206     .item_name  = av_default_item_name,
4207     .option     = h263_options,
4208     .version    = LIBAVUTIL_VERSION_INT,
4209 };
4210
4211 AVCodec ff_h263_encoder = {
4212     .name           = "h263",
4213     .type           = AVMEDIA_TYPE_VIDEO,
4214     .id             = CODEC_ID_H263,
4215     .priv_data_size = sizeof(MpegEncContext),
4216     .init           = ff_MPV_encode_init,
4217     .encode2        = ff_MPV_encode_picture,
4218     .close          = ff_MPV_encode_end,
4219     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4220     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4221     .priv_class     = &h263_class,
4222 };
4223
4224 static const AVOption h263p_options[] = {
4225     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4226     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4227     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4228     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4229     FF_MPV_COMMON_OPTS
4230     { NULL },
4231 };
4232 static const AVClass h263p_class = {
4233     .class_name = "H.263p encoder",
4234     .item_name  = av_default_item_name,
4235     .option     = h263p_options,
4236     .version    = LIBAVUTIL_VERSION_INT,
4237 };
4238
4239 AVCodec ff_h263p_encoder = {
4240     .name           = "h263p",
4241     .type           = AVMEDIA_TYPE_VIDEO,
4242     .id             = CODEC_ID_H263P,
4243     .priv_data_size = sizeof(MpegEncContext),
4244     .init           = ff_MPV_encode_init,
4245     .encode2        = ff_MPV_encode_picture,
4246     .close          = ff_MPV_encode_end,
4247     .capabilities   = CODEC_CAP_SLICE_THREADS,
4248     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4249     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4250     .priv_class     = &h263p_class,
4251 };
4252
4253 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4254
4255 AVCodec ff_msmpeg4v2_encoder = {
4256     .name           = "msmpeg4v2",
4257     .type           = AVMEDIA_TYPE_VIDEO,
4258     .id             = CODEC_ID_MSMPEG4V2,
4259     .priv_data_size = sizeof(MpegEncContext),
4260     .init           = ff_MPV_encode_init,
4261     .encode2        = ff_MPV_encode_picture,
4262     .close          = ff_MPV_encode_end,
4263     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4264     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4265     .priv_class     = &msmpeg4v2_class,
4266 };
4267
4268 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4269
4270 AVCodec ff_msmpeg4v3_encoder = {
4271     .name           = "msmpeg4",
4272     .type           = AVMEDIA_TYPE_VIDEO,
4273     .id             = CODEC_ID_MSMPEG4V3,
4274     .priv_data_size = sizeof(MpegEncContext),
4275     .init           = ff_MPV_encode_init,
4276     .encode2        = ff_MPV_encode_picture,
4277     .close          = ff_MPV_encode_end,
4278     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4279     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4280     .priv_class     = &msmpeg4v3_class,
4281 };
4282
4283 FF_MPV_GENERIC_CLASS(wmv1)
4284
4285 AVCodec ff_wmv1_encoder = {
4286     .name           = "wmv1",
4287     .type           = AVMEDIA_TYPE_VIDEO,
4288     .id             = CODEC_ID_WMV1,
4289     .priv_data_size = sizeof(MpegEncContext),
4290     .init           = ff_MPV_encode_init,
4291     .encode2        = ff_MPV_encode_picture,
4292     .close          = ff_MPV_encode_end,
4293     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4294     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4295     .priv_class     = &wmv1_class,
4296 };