]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
movtextdec: fix return value for too small packets.
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "mpegvideo_common.h"
37 #include "h263.h"
38 #include "mjpegenc.h"
39 #include "msmpeg4.h"
40 #include "faandct.h"
41 #include "thread.h"
42 #include "aandcttab.h"
43 #include "flv.h"
44 #include "mpeg4video.h"
45 #include "internal.h"
46 #include "bytestream.h"
47 #include <limits.h>
48 #include "sp5x.h"
49
50 //#undef NDEBUG
51 //#include <assert.h>
52
53 static int encode_picture(MpegEncContext *s, int picture_number);
54 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
55 static int sse_mb(MpegEncContext *s);
56 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
57 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
58
59 /* enable all paranoid tests for rounding, overflows, etc... */
60 //#define PARANOID
61
62 //#define DEBUG
63
64 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
65 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
66
67 const AVOption ff_mpv_generic_options[] = {
68     FF_MPV_COMMON_OPTS
69     { NULL },
70 };
71
72 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
73                        uint16_t (*qmat16)[2][64],
74                        const uint16_t *quant_matrix,
75                        int bias, int qmin, int qmax, int intra)
76 {
77     int qscale;
78     int shift = 0;
79
80     for (qscale = qmin; qscale <= qmax; qscale++) {
81         int i;
82         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
83             dsp->fdct == ff_jpeg_fdct_islow_10 ||
84             dsp->fdct == ff_faandct) {
85             for (i = 0; i < 64; i++) {
86                 const int j = dsp->idct_permutation[i];
87                 /* 16 <= qscale * quant_matrix[i] <= 7905
88                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
89                  *             19952 <=              x  <= 249205026
90                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
91                  *           3444240 >= (1 << 36) / (x) >= 275 */
92
93                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
94                                         (qscale * quant_matrix[j]));
95             }
96         } else if (dsp->fdct == ff_fdct_ifast) {
97             for (i = 0; i < 64; i++) {
98                 const int j = dsp->idct_permutation[i];
99                 /* 16 <= qscale * quant_matrix[i] <= 7905
100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
101                  *             19952 <=              x  <= 249205026
102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
103                  *           3444240 >= (1 << 36) / (x) >= 275 */
104
105                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
106                                         (ff_aanscales[i] * qscale * quant_matrix[j]));
107             }
108         } else {
109             for (i = 0; i < 64; i++) {
110                 const int j = dsp->idct_permutation[i];
111                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
112                  * Assume x = qscale * quant_matrix[i]
113                  * So             16 <=              x  <= 7905
114                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
115                  * so          32768 >= (1 << 19) / (x) >= 67 */
116                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
117                                         (qscale * quant_matrix[j]));
118                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
119                 //                    (qscale * quant_matrix[i]);
120                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
121                                        (qscale * quant_matrix[j]);
122
123                 if (qmat16[qscale][0][i] == 0 ||
124                     qmat16[qscale][0][i] == 128 * 256)
125                     qmat16[qscale][0][i] = 128 * 256 - 1;
126                 qmat16[qscale][1][i] =
127                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
128                                 qmat16[qscale][0][i]);
129             }
130         }
131
132         for (i = intra; i < 64; i++) {
133             int64_t max = 8191;
134             if (dsp->fdct == ff_fdct_ifast) {
135                 max = (8191LL * ff_aanscales[i]) >> 14;
136             }
137             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
138                 shift++;
139             }
140         }
141     }
142     if (shift) {
143         av_log(NULL, AV_LOG_INFO,
144                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
145                QMAT_SHIFT - shift);
146     }
147 }
148
149 static inline void update_qscale(MpegEncContext *s)
150 {
151     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
152                 (FF_LAMBDA_SHIFT + 7);
153     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
154
155     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
156                  FF_LAMBDA_SHIFT;
157 }
158
159 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
160 {
161     int i;
162
163     if (matrix) {
164         put_bits(pb, 1, 1);
165         for (i = 0; i < 64; i++) {
166             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
167         }
168     } else
169         put_bits(pb, 1, 0);
170 }
171
172 /**
173  * init s->current_picture.qscale_table from s->lambda_table
174  */
175 void ff_init_qscale_tab(MpegEncContext *s)
176 {
177     int8_t * const qscale_table = s->current_picture.f.qscale_table;
178     int i;
179
180     for (i = 0; i < s->mb_num; i++) {
181         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
182         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
183         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
184                                                   s->avctx->qmax);
185     }
186 }
187
188 static void copy_picture_attributes(MpegEncContext *s,
189                                     AVFrame *dst,
190                                     AVFrame *src)
191 {
192     int i;
193
194     dst->pict_type              = src->pict_type;
195     dst->quality                = src->quality;
196     dst->coded_picture_number   = src->coded_picture_number;
197     dst->display_picture_number = src->display_picture_number;
198     //dst->reference              = src->reference;
199     dst->pts                    = src->pts;
200     dst->interlaced_frame       = src->interlaced_frame;
201     dst->top_field_first        = src->top_field_first;
202
203     if (s->avctx->me_threshold) {
204         if (!src->motion_val[0])
205             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
206         if (!src->mb_type)
207             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
208         if (!src->ref_index[0])
209             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
210         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
211             av_log(s->avctx, AV_LOG_ERROR,
212                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
213                    src->motion_subsample_log2, dst->motion_subsample_log2);
214
215         memcpy(dst->mb_type, src->mb_type,
216                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
217
218         for (i = 0; i < 2; i++) {
219             int stride = ((16 * s->mb_width ) >>
220                           src->motion_subsample_log2) + 1;
221             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
222
223             if (src->motion_val[i] &&
224                 src->motion_val[i] != dst->motion_val[i]) {
225                 memcpy(dst->motion_val[i], src->motion_val[i],
226                        2 * stride * height * sizeof(int16_t));
227             }
228             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
229                 memcpy(dst->ref_index[i], src->ref_index[i],
230                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
231             }
232         }
233     }
234 }
235
236 static void update_duplicate_context_after_me(MpegEncContext *dst,
237                                               MpegEncContext *src)
238 {
239 #define COPY(a) dst->a= src->a
240     COPY(pict_type);
241     COPY(current_picture);
242     COPY(f_code);
243     COPY(b_code);
244     COPY(qscale);
245     COPY(lambda);
246     COPY(lambda2);
247     COPY(picture_in_gop_number);
248     COPY(gop_picture_number);
249     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
250     COPY(progressive_frame);    // FIXME don't set in encode_header
251     COPY(partitioned_frame);    // FIXME don't set in encode_header
252 #undef COPY
253 }
254
255 /**
256  * Set the given MpegEncContext to defaults for encoding.
257  * the changed fields will not depend upon the prior state of the MpegEncContext.
258  */
259 static void MPV_encode_defaults(MpegEncContext *s)
260 {
261     int i;
262     ff_MPV_common_defaults(s);
263
264     for (i = -16; i < 16; i++) {
265         default_fcode_tab[i + MAX_MV] = 1;
266     }
267     s->me.mv_penalty = default_mv_penalty;
268     s->fcode_tab     = default_fcode_tab;
269 }
270
271 /* init video encoder */
272 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
273 {
274     MpegEncContext *s = avctx->priv_data;
275     int i;
276     int chroma_h_shift, chroma_v_shift;
277
278     MPV_encode_defaults(s);
279
280     switch (avctx->codec_id) {
281     case CODEC_ID_MPEG2VIDEO:
282         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
283             avctx->pix_fmt != PIX_FMT_YUV422P) {
284             av_log(avctx, AV_LOG_ERROR,
285                    "only YUV420 and YUV422 are supported\n");
286             return -1;
287         }
288         break;
289     case CODEC_ID_LJPEG:
290         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
291             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
292             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
293             avctx->pix_fmt != PIX_FMT_BGR0     &&
294             avctx->pix_fmt != PIX_FMT_BGRA     &&
295             avctx->pix_fmt != PIX_FMT_BGR24    &&
296             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
297               avctx->pix_fmt != PIX_FMT_YUV422P &&
298               avctx->pix_fmt != PIX_FMT_YUV444P) ||
299              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
300             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
301             return -1;
302         }
303         break;
304     case CODEC_ID_MJPEG:
305     case CODEC_ID_AMV:
306         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
307             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
308             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
309               avctx->pix_fmt != PIX_FMT_YUV422P) ||
310              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
311             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
312             return -1;
313         }
314         break;
315     default:
316         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
317             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
318             return -1;
319         }
320     }
321
322     switch (avctx->pix_fmt) {
323     case PIX_FMT_YUVJ422P:
324     case PIX_FMT_YUV422P:
325         s->chroma_format = CHROMA_422;
326         break;
327     case PIX_FMT_YUVJ420P:
328     case PIX_FMT_YUV420P:
329     default:
330         s->chroma_format = CHROMA_420;
331         break;
332     }
333
334     s->bit_rate = avctx->bit_rate;
335     s->width    = avctx->width;
336     s->height   = avctx->height;
337     if (avctx->gop_size > 600 &&
338         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
339         av_log(avctx, AV_LOG_WARNING,
340                "keyframe interval too large!, reducing it from %d to %d\n",
341                avctx->gop_size, 600);
342         avctx->gop_size = 600;
343     }
344     s->gop_size     = avctx->gop_size;
345     s->avctx        = avctx;
346     s->flags        = avctx->flags;
347     s->flags2       = avctx->flags2;
348     s->max_b_frames = avctx->max_b_frames;
349     s->codec_id     = avctx->codec->id;
350 #if FF_API_MPV_GLOBAL_OPTS
351     if (avctx->luma_elim_threshold)
352         s->luma_elim_threshold   = avctx->luma_elim_threshold;
353     if (avctx->chroma_elim_threshold)
354         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
355 #endif
356     s->strict_std_compliance = avctx->strict_std_compliance;
357     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
358     s->mpeg_quant         = avctx->mpeg_quant;
359     s->rtp_mode           = !!avctx->rtp_payload_size;
360     s->intra_dc_precision = avctx->intra_dc_precision;
361     s->user_specified_pts = AV_NOPTS_VALUE;
362
363     if (s->gop_size <= 1) {
364         s->intra_only = 1;
365         s->gop_size   = 12;
366     } else {
367         s->intra_only = 0;
368     }
369
370     s->me_method = avctx->me_method;
371
372     /* Fixed QSCALE */
373     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
374
375 #if FF_API_MPV_GLOBAL_OPTS
376     if (s->flags & CODEC_FLAG_QP_RD)
377         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
378 #endif
379
380     s->adaptive_quant = (s->avctx->lumi_masking ||
381                          s->avctx->dark_masking ||
382                          s->avctx->temporal_cplx_masking ||
383                          s->avctx->spatial_cplx_masking  ||
384                          s->avctx->p_masking      ||
385                          s->avctx->border_masking ||
386                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
387                         !s->fixed_qscale;
388
389     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
390
391     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
392         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
393         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
394             return -1;
395     }
396
397     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
398         av_log(avctx, AV_LOG_INFO,
399                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
400     }
401
402     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
403         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
404         return -1;
405     }
406
407     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
408         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
409         return -1;
410     }
411
412     if (avctx->rc_max_rate &&
413         avctx->rc_max_rate == avctx->bit_rate &&
414         avctx->rc_max_rate != avctx->rc_min_rate) {
415         av_log(avctx, AV_LOG_INFO,
416                "impossible bitrate constraints, this will fail\n");
417     }
418
419     if (avctx->rc_buffer_size &&
420         avctx->bit_rate * (int64_t)avctx->time_base.num >
421             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
422         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
423         return -1;
424     }
425
426     if (!s->fixed_qscale &&
427         avctx->bit_rate * av_q2d(avctx->time_base) >
428             avctx->bit_rate_tolerance) {
429         av_log(avctx, AV_LOG_ERROR,
430                "bitrate tolerance too small for bitrate\n");
431         return -1;
432     }
433
434     if (s->avctx->rc_max_rate &&
435         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
436         (s->codec_id == CODEC_ID_MPEG1VIDEO ||
437          s->codec_id == CODEC_ID_MPEG2VIDEO) &&
438         90000LL * (avctx->rc_buffer_size - 1) >
439             s->avctx->rc_max_rate * 0xFFFFLL) {
440         av_log(avctx, AV_LOG_INFO,
441                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
442                "specified vbv buffer is too large for the given bitrate!\n");
443     }
444
445     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != CODEC_ID_MPEG4 &&
446         s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P &&
447         s->codec_id != CODEC_ID_FLV1) {
448         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
449         return -1;
450     }
451
452     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
453         av_log(avctx, AV_LOG_ERROR,
454                "OBMC is only supported with simple mb decision\n");
455         return -1;
456     }
457
458     if (s->quarter_sample && s->codec_id != CODEC_ID_MPEG4) {
459         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
460         return -1;
461     }
462
463     if (s->max_b_frames                    &&
464         s->codec_id != CODEC_ID_MPEG4      &&
465         s->codec_id != CODEC_ID_MPEG1VIDEO &&
466         s->codec_id != CODEC_ID_MPEG2VIDEO) {
467         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
468         return -1;
469     }
470
471     if ((s->codec_id == CODEC_ID_MPEG4 ||
472          s->codec_id == CODEC_ID_H263  ||
473          s->codec_id == CODEC_ID_H263P) &&
474         (avctx->sample_aspect_ratio.num > 255 ||
475          avctx->sample_aspect_ratio.den > 255)) {
476         av_log(avctx, AV_LOG_WARNING,
477                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
478                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
479         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
480                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
481     }
482
483     if ((s->codec_id == CODEC_ID_H263  ||
484          s->codec_id == CODEC_ID_H263P) &&
485         (avctx->width  > 2048 ||
486          avctx->height > 1152 )) {
487         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
488         return -1;
489     }
490     if ((s->codec_id == CODEC_ID_H263  ||
491          s->codec_id == CODEC_ID_H263P) &&
492         ((avctx->width &3) ||
493          (avctx->height&3) )) {
494         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
495         return -1;
496     }
497
498     if (s->codec_id == CODEC_ID_MPEG1VIDEO &&
499         (avctx->width  > 4095 ||
500          avctx->height > 4095 )) {
501         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
502         return -1;
503     }
504
505     if (s->codec_id == CODEC_ID_MPEG2VIDEO &&
506         (avctx->width  > 16383 ||
507          avctx->height > 16383 )) {
508         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
509         return -1;
510     }
511
512     if ((s->codec_id == CODEC_ID_WMV1 ||
513          s->codec_id == CODEC_ID_WMV2) &&
514          avctx->width & 1) {
515          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
516          return -1;
517     }
518
519     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
520         s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO) {
521         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
522         return -1;
523     }
524
525     // FIXME mpeg2 uses that too
526     if (s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4) {
527         av_log(avctx, AV_LOG_ERROR,
528                "mpeg2 style quantization not supported by codec\n");
529         return -1;
530     }
531
532 #if FF_API_MPV_GLOBAL_OPTS
533     if (s->flags & CODEC_FLAG_CBP_RD)
534         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
535 #endif
536
537     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
538         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
539         return -1;
540     }
541
542     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
543         s->avctx->mb_decision != FF_MB_DECISION_RD) {
544         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
545         return -1;
546     }
547
548     if (s->avctx->scenechange_threshold < 1000000000 &&
549         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
550         av_log(avctx, AV_LOG_ERROR,
551                "closed gop with scene change detection are not supported yet, "
552                "set threshold to 1000000000\n");
553         return -1;
554     }
555
556     if (s->flags & CODEC_FLAG_LOW_DELAY) {
557         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
558             av_log(avctx, AV_LOG_ERROR,
559                   "low delay forcing is only available for mpeg2\n");
560             return -1;
561         }
562         if (s->max_b_frames != 0) {
563             av_log(avctx, AV_LOG_ERROR,
564                    "b frames cannot be used with low delay\n");
565             return -1;
566         }
567     }
568
569     if (s->q_scale_type == 1) {
570         if (avctx->qmax > 12) {
571             av_log(avctx, AV_LOG_ERROR,
572                    "non linear quant only supports qmax <= 12 currently\n");
573             return -1;
574         }
575     }
576
577     if (s->avctx->thread_count > 1         &&
578         s->codec_id != CODEC_ID_MPEG4      &&
579         s->codec_id != CODEC_ID_MPEG1VIDEO &&
580         s->codec_id != CODEC_ID_MPEG2VIDEO &&
581         s->codec_id != CODEC_ID_MJPEG      &&
582         (s->codec_id != CODEC_ID_H263P)) {
583         av_log(avctx, AV_LOG_ERROR,
584                "multi threaded encoding not supported by codec\n");
585         return -1;
586     }
587
588     if (s->avctx->thread_count < 1) {
589         av_log(avctx, AV_LOG_ERROR,
590                "automatic thread number detection not supported by codec, "
591                "patch welcome\n");
592         return -1;
593     }
594
595     if (s->avctx->thread_count > 1)
596         s->rtp_mode = 1;
597
598     if (!avctx->time_base.den || !avctx->time_base.num) {
599         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
600         return -1;
601     }
602
603     i = (INT_MAX / 2 + 128) >> 8;
604     if (avctx->me_threshold >= i) {
605         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
606                i - 1);
607         return -1;
608     }
609     if (avctx->mb_threshold >= i) {
610         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
611                i - 1);
612         return -1;
613     }
614
615     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
616         av_log(avctx, AV_LOG_INFO,
617                "notice: b_frame_strategy only affects the first pass\n");
618         avctx->b_frame_strategy = 0;
619     }
620
621     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
622     if (i > 1) {
623         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
624         avctx->time_base.den /= i;
625         avctx->time_base.num /= i;
626         //return -1;
627     }
628
629     if (s->mpeg_quant || s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || s->codec_id == CODEC_ID_MJPEG || s->codec_id==CODEC_ID_AMV) {
630         // (a + x * 3 / 8) / x
631         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
632         s->inter_quant_bias = 0;
633     } else {
634         s->intra_quant_bias = 0;
635         // (a - x / 4) / x
636         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
637     }
638
639     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
640         s->intra_quant_bias = avctx->intra_quant_bias;
641     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
642         s->inter_quant_bias = avctx->inter_quant_bias;
643
644     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
645
646     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
647                                   &chroma_v_shift);
648
649     if (avctx->codec_id == CODEC_ID_MPEG4 &&
650         s->avctx->time_base.den > (1 << 16) - 1) {
651         av_log(avctx, AV_LOG_ERROR,
652                "timebase %d/%d not supported by MPEG 4 standard, "
653                "the maximum admitted value for the timebase denominator "
654                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
655                (1 << 16) - 1);
656         return -1;
657     }
658     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
659
660 #if FF_API_MPV_GLOBAL_OPTS
661     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
662         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
663     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
664         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
665     if (avctx->quantizer_noise_shaping)
666         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
667 #endif
668
669     switch (avctx->codec->id) {
670     case CODEC_ID_MPEG1VIDEO:
671         s->out_format = FMT_MPEG1;
672         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
673         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
674         break;
675     case CODEC_ID_MPEG2VIDEO:
676         s->out_format = FMT_MPEG1;
677         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
678         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
679         s->rtp_mode   = 1;
680         break;
681     case CODEC_ID_LJPEG:
682     case CODEC_ID_MJPEG:
683     case CODEC_ID_AMV:
684         s->out_format = FMT_MJPEG;
685         s->intra_only = 1; /* force intra only for jpeg */
686         if (avctx->codec->id == CODEC_ID_LJPEG &&
687             (avctx->pix_fmt == PIX_FMT_BGR0
688              || s->avctx->pix_fmt == PIX_FMT_BGRA
689              || s->avctx->pix_fmt == PIX_FMT_BGR24)) {
690             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
691             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
692             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
693         } else {
694             s->mjpeg_vsample[0] = 2;
695             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
696             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
697             s->mjpeg_hsample[0] = 2;
698             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
699             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
700         }
701         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
702             ff_mjpeg_encode_init(s) < 0)
703             return -1;
704         avctx->delay = 0;
705         s->low_delay = 1;
706         break;
707     case CODEC_ID_H261:
708         if (!CONFIG_H261_ENCODER)
709             return -1;
710         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
711             av_log(avctx, AV_LOG_ERROR,
712                    "The specified picture size of %dx%d is not valid for the "
713                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
714                     s->width, s->height);
715             return -1;
716         }
717         s->out_format = FMT_H261;
718         avctx->delay  = 0;
719         s->low_delay  = 1;
720         break;
721     case CODEC_ID_H263:
722         if (!CONFIG_H263_ENCODER)
723             return -1;
724         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
725                              s->width, s->height) == 8) {
726             av_log(avctx, AV_LOG_ERROR,
727                    "The specified picture size of %dx%d is not valid for "
728                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
729                    "352x288, 704x576, and 1408x1152. "
730                    "Try H.263+.\n", s->width, s->height);
731             return -1;
732         }
733         s->out_format = FMT_H263;
734         avctx->delay  = 0;
735         s->low_delay  = 1;
736         break;
737     case CODEC_ID_H263P:
738         s->out_format = FMT_H263;
739         s->h263_plus  = 1;
740         /* Fx */
741         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
742         s->modified_quant  = s->h263_aic;
743         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
744         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
745
746         /* /Fx */
747         /* These are just to be sure */
748         avctx->delay = 0;
749         s->low_delay = 1;
750         break;
751     case CODEC_ID_FLV1:
752         s->out_format      = FMT_H263;
753         s->h263_flv        = 2; /* format = 1; 11-bit codes */
754         s->unrestricted_mv = 1;
755         s->rtp_mode  = 0; /* don't allow GOB */
756         avctx->delay = 0;
757         s->low_delay = 1;
758         break;
759     case CODEC_ID_RV10:
760         s->out_format = FMT_H263;
761         avctx->delay  = 0;
762         s->low_delay  = 1;
763         break;
764     case CODEC_ID_RV20:
765         s->out_format      = FMT_H263;
766         avctx->delay       = 0;
767         s->low_delay       = 1;
768         s->modified_quant  = 1;
769         s->h263_aic        = 1;
770         s->h263_plus       = 1;
771         s->loop_filter     = 1;
772         s->unrestricted_mv = 0;
773         break;
774     case CODEC_ID_MPEG4:
775         s->out_format      = FMT_H263;
776         s->h263_pred       = 1;
777         s->unrestricted_mv = 1;
778         s->low_delay       = s->max_b_frames ? 0 : 1;
779         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
780         break;
781     case CODEC_ID_MSMPEG4V2:
782         s->out_format      = FMT_H263;
783         s->h263_pred       = 1;
784         s->unrestricted_mv = 1;
785         s->msmpeg4_version = 2;
786         avctx->delay       = 0;
787         s->low_delay       = 1;
788         break;
789     case CODEC_ID_MSMPEG4V3:
790         s->out_format        = FMT_H263;
791         s->h263_pred         = 1;
792         s->unrestricted_mv   = 1;
793         s->msmpeg4_version   = 3;
794         s->flipflop_rounding = 1;
795         avctx->delay         = 0;
796         s->low_delay         = 1;
797         break;
798     case CODEC_ID_WMV1:
799         s->out_format        = FMT_H263;
800         s->h263_pred         = 1;
801         s->unrestricted_mv   = 1;
802         s->msmpeg4_version   = 4;
803         s->flipflop_rounding = 1;
804         avctx->delay         = 0;
805         s->low_delay         = 1;
806         break;
807     case CODEC_ID_WMV2:
808         s->out_format        = FMT_H263;
809         s->h263_pred         = 1;
810         s->unrestricted_mv   = 1;
811         s->msmpeg4_version   = 5;
812         s->flipflop_rounding = 1;
813         avctx->delay         = 0;
814         s->low_delay         = 1;
815         break;
816     default:
817         return -1;
818     }
819
820     avctx->has_b_frames = !s->low_delay;
821
822     s->encoding = 1;
823
824     s->progressive_frame    =
825     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
826                                                 CODEC_FLAG_INTERLACED_ME) ||
827                                 s->alternate_scan);
828
829     /* init */
830     if (ff_MPV_common_init(s) < 0)
831         return -1;
832
833     if (!s->dct_quantize)
834         s->dct_quantize = ff_dct_quantize_c;
835     if (!s->denoise_dct)
836         s->denoise_dct  = denoise_dct_c;
837     s->fast_dct_quantize = s->dct_quantize;
838     if (avctx->trellis)
839         s->dct_quantize  = dct_quantize_trellis_c;
840
841     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
842         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
843
844     s->quant_precision = 5;
845
846     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
847     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
848
849     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
850         ff_h261_encode_init(s);
851     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
852         ff_h263_encode_init(s);
853     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
854         ff_msmpeg4_encode_init(s);
855     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
856         && s->out_format == FMT_MPEG1)
857         ff_mpeg1_encode_init(s);
858
859     /* init q matrix */
860     for (i = 0; i < 64; i++) {
861         int j = s->dsp.idct_permutation[i];
862         if (CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4 &&
863             s->mpeg_quant) {
864             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
865             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
866         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
867             s->intra_matrix[j] =
868             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
869         } else {
870             /* mpeg1/2 */
871             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
872             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
873         }
874         if (s->avctx->intra_matrix)
875             s->intra_matrix[j] = s->avctx->intra_matrix[i];
876         if (s->avctx->inter_matrix)
877             s->inter_matrix[j] = s->avctx->inter_matrix[i];
878     }
879
880     /* precompute matrix */
881     /* for mjpeg, we do include qscale in the matrix */
882     if (s->out_format != FMT_MJPEG) {
883         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
884                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
885                           31, 1);
886         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
887                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
888                           31, 0);
889     }
890
891     if (ff_rate_control_init(s) < 0)
892         return -1;
893
894     return 0;
895 }
896
897 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
898 {
899     MpegEncContext *s = avctx->priv_data;
900
901     ff_rate_control_uninit(s);
902
903     ff_MPV_common_end(s);
904     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
905         s->out_format == FMT_MJPEG)
906         ff_mjpeg_encode_close(s);
907
908     av_freep(&avctx->extradata);
909
910     return 0;
911 }
912
913 static int get_sae(uint8_t *src, int ref, int stride)
914 {
915     int x,y;
916     int acc = 0;
917
918     for (y = 0; y < 16; y++) {
919         for (x = 0; x < 16; x++) {
920             acc += FFABS(src[x + y * stride] - ref);
921         }
922     }
923
924     return acc;
925 }
926
927 static int get_intra_count(MpegEncContext *s, uint8_t *src,
928                            uint8_t *ref, int stride)
929 {
930     int x, y, w, h;
931     int acc = 0;
932
933     w = s->width  & ~15;
934     h = s->height & ~15;
935
936     for (y = 0; y < h; y += 16) {
937         for (x = 0; x < w; x += 16) {
938             int offset = x + y * stride;
939             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
940                                      16);
941             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
942             int sae  = get_sae(src + offset, mean, stride);
943
944             acc += sae + 500 < sad;
945         }
946     }
947     return acc;
948 }
949
950
951 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
952 {
953     AVFrame *pic = NULL;
954     int64_t pts;
955     int i;
956     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
957                                                  (s->low_delay ? 0 : 1);
958     int direct = 1;
959
960     if (pic_arg) {
961         pts = pic_arg->pts;
962         pic_arg->display_picture_number = s->input_picture_number++;
963
964         if (pts != AV_NOPTS_VALUE) {
965             if (s->user_specified_pts != AV_NOPTS_VALUE) {
966                 int64_t time = pts;
967                 int64_t last = s->user_specified_pts;
968
969                 if (time <= last) {
970                     av_log(s->avctx, AV_LOG_ERROR,
971                            "Error, Invalid timestamp=%"PRId64", "
972                            "last=%"PRId64"\n", pts, s->user_specified_pts);
973                     return -1;
974                 }
975
976                 if (!s->low_delay && pic_arg->display_picture_number == 1)
977                     s->dts_delta = time - last;
978             }
979             s->user_specified_pts = pts;
980         } else {
981             if (s->user_specified_pts != AV_NOPTS_VALUE) {
982                 s->user_specified_pts =
983                 pts = s->user_specified_pts + 1;
984                 av_log(s->avctx, AV_LOG_INFO,
985                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
986                        pts);
987             } else {
988                 pts = pic_arg->display_picture_number;
989             }
990         }
991     }
992
993   if (pic_arg) {
994     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
995         direct = 0;
996     if (pic_arg->linesize[0] != s->linesize)
997         direct = 0;
998     if (pic_arg->linesize[1] != s->uvlinesize)
999         direct = 0;
1000     if (pic_arg->linesize[2] != s->uvlinesize)
1001         direct = 0;
1002
1003     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
1004     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
1005
1006     if (direct) {
1007         i = ff_find_unused_picture(s, 1);
1008         if (i < 0)
1009             return i;
1010
1011         pic = &s->picture[i].f;
1012         pic->reference = 3;
1013
1014         for (i = 0; i < 4; i++) {
1015             pic->data[i]     = pic_arg->data[i];
1016             pic->linesize[i] = pic_arg->linesize[i];
1017         }
1018         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
1019             return -1;
1020         }
1021     } else {
1022         i = ff_find_unused_picture(s, 0);
1023         if (i < 0)
1024             return i;
1025
1026         pic = &s->picture[i].f;
1027         pic->reference = 3;
1028
1029         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
1030             return -1;
1031         }
1032
1033         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1034             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1035             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1036             // empty
1037         } else {
1038             int h_chroma_shift, v_chroma_shift;
1039             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
1040                                           &v_chroma_shift);
1041
1042             for (i = 0; i < 3; i++) {
1043                 int src_stride = pic_arg->linesize[i];
1044                 int dst_stride = i ? s->uvlinesize : s->linesize;
1045                 int h_shift = i ? h_chroma_shift : 0;
1046                 int v_shift = i ? v_chroma_shift : 0;
1047                 int w = s->width  >> h_shift;
1048                 int h = s->height >> v_shift;
1049                 uint8_t *src = pic_arg->data[i];
1050                 uint8_t *dst = pic->data[i];
1051
1052                 if(s->codec_id == CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)){
1053                     h= ((s->height+15)/16*16)>>v_shift;
1054                 }
1055
1056                 if (!s->avctx->rc_buffer_size)
1057                     dst += INPLACE_OFFSET;
1058
1059                 if (src_stride == dst_stride)
1060                     memcpy(dst, src, src_stride * h);
1061                 else {
1062                     while (h--) {
1063                         memcpy(dst, src, w);
1064                         dst += dst_stride;
1065                         src += src_stride;
1066                     }
1067                 }
1068             }
1069         }
1070     }
1071     copy_picture_attributes(s, pic, pic_arg);
1072     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1073   }
1074
1075     /* shift buffer entries */
1076     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1077         s->input_picture[i - 1] = s->input_picture[i];
1078
1079     s->input_picture[encoding_delay] = (Picture*) pic;
1080
1081     return 0;
1082 }
1083
1084 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1085 {
1086     int x, y, plane;
1087     int score = 0;
1088     int64_t score64 = 0;
1089
1090     for (plane = 0; plane < 3; plane++) {
1091         const int stride = p->f.linesize[plane];
1092         const int bw = plane ? 1 : 2;
1093         for (y = 0; y < s->mb_height * bw; y++) {
1094             for (x = 0; x < s->mb_width * bw; x++) {
1095                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1096                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1097                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1098                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1099
1100                 switch (s->avctx->frame_skip_exp) {
1101                 case 0: score    =  FFMAX(score, v);          break;
1102                 case 1: score   += FFABS(v);                  break;
1103                 case 2: score   += v * v;                     break;
1104                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1105                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1106                 }
1107             }
1108         }
1109     }
1110
1111     if (score)
1112         score64 = score;
1113
1114     if (score64 < s->avctx->frame_skip_threshold)
1115         return 1;
1116     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1117         return 1;
1118     return 0;
1119 }
1120
1121 static int estimate_best_b_count(MpegEncContext *s)
1122 {
1123     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1124     AVCodecContext *c = avcodec_alloc_context3(NULL);
1125     AVFrame input[FF_MAX_B_FRAMES + 2];
1126     const int scale = s->avctx->brd_scale;
1127     int i, j, out_size, p_lambda, b_lambda, lambda2;
1128     int outbuf_size  = s->width * s->height; // FIXME
1129     uint8_t *outbuf  = av_malloc(outbuf_size);
1130     int64_t best_rd  = INT64_MAX;
1131     int best_b_count = -1;
1132
1133     assert(scale >= 0 && scale <= 3);
1134
1135     //emms_c();
1136     //s->next_picture_ptr->quality;
1137     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1138     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1139     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1140     if (!b_lambda) // FIXME we should do this somewhere else
1141         b_lambda = p_lambda;
1142     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1143                FF_LAMBDA_SHIFT;
1144
1145     c->width        = s->width  >> scale;
1146     c->height       = s->height >> scale;
1147     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1148                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1149     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1150     c->mb_decision  = s->avctx->mb_decision;
1151     c->me_cmp       = s->avctx->me_cmp;
1152     c->mb_cmp       = s->avctx->mb_cmp;
1153     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1154     c->pix_fmt      = PIX_FMT_YUV420P;
1155     c->time_base    = s->avctx->time_base;
1156     c->max_b_frames = s->max_b_frames;
1157
1158     if (avcodec_open2(c, codec, NULL) < 0)
1159         return -1;
1160
1161     for (i = 0; i < s->max_b_frames + 2; i++) {
1162         int ysize = c->width * c->height;
1163         int csize = (c->width / 2) * (c->height / 2);
1164         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1165                                                 s->next_picture_ptr;
1166
1167         avcodec_get_frame_defaults(&input[i]);
1168         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1169         input[i].data[1]     = input[i].data[0] + ysize;
1170         input[i].data[2]     = input[i].data[1] + csize;
1171         input[i].linesize[0] = c->width;
1172         input[i].linesize[1] =
1173         input[i].linesize[2] = c->width / 2;
1174
1175         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1176             pre_input = *pre_input_ptr;
1177
1178             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1179                 pre_input.f.data[0] += INPLACE_OFFSET;
1180                 pre_input.f.data[1] += INPLACE_OFFSET;
1181                 pre_input.f.data[2] += INPLACE_OFFSET;
1182             }
1183
1184             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1185                                  pre_input.f.data[0], pre_input.f.linesize[0],
1186                                  c->width,      c->height);
1187             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1188                                  pre_input.f.data[1], pre_input.f.linesize[1],
1189                                  c->width >> 1, c->height >> 1);
1190             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1191                                  pre_input.f.data[2], pre_input.f.linesize[2],
1192                                  c->width >> 1, c->height >> 1);
1193         }
1194     }
1195
1196     for (j = 0; j < s->max_b_frames + 1; j++) {
1197         int64_t rd = 0;
1198
1199         if (!s->input_picture[j])
1200             break;
1201
1202         c->error[0] = c->error[1] = c->error[2] = 0;
1203
1204         input[0].pict_type = AV_PICTURE_TYPE_I;
1205         input[0].quality   = 1 * FF_QP2LAMBDA;
1206         out_size           = avcodec_encode_video(c, outbuf,
1207                                                   outbuf_size, &input[0]);
1208         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1209
1210         for (i = 0; i < s->max_b_frames + 1; i++) {
1211             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1212
1213             input[i + 1].pict_type = is_p ?
1214                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1215             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1216             out_size = avcodec_encode_video(c, outbuf, outbuf_size,
1217                                             &input[i + 1]);
1218             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1219         }
1220
1221         /* get the delayed frames */
1222         while (out_size) {
1223             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
1224             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1225         }
1226
1227         rd += c->error[0] + c->error[1] + c->error[2];
1228
1229         if (rd < best_rd) {
1230             best_rd = rd;
1231             best_b_count = j;
1232         }
1233     }
1234
1235     av_freep(&outbuf);
1236     avcodec_close(c);
1237     av_freep(&c);
1238
1239     for (i = 0; i < s->max_b_frames + 2; i++) {
1240         av_freep(&input[i].data[0]);
1241     }
1242
1243     return best_b_count;
1244 }
1245
1246 static int select_input_picture(MpegEncContext *s)
1247 {
1248     int i;
1249
1250     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1251         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1252     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1253
1254     /* set next picture type & ordering */
1255     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1256         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1257             s->next_picture_ptr == NULL || s->intra_only) {
1258             s->reordered_input_picture[0] = s->input_picture[0];
1259             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1260             s->reordered_input_picture[0]->f.coded_picture_number =
1261                 s->coded_picture_number++;
1262         } else {
1263             int b_frames;
1264
1265             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1266                 if (s->picture_in_gop_number < s->gop_size &&
1267                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1268                     // FIXME check that te gop check above is +-1 correct
1269                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1270                     //       s->input_picture[0]->f.data[0],
1271                     //       s->input_picture[0]->pts);
1272
1273                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1274                         for (i = 0; i < 4; i++)
1275                             s->input_picture[0]->f.data[i] = NULL;
1276                         s->input_picture[0]->f.type = 0;
1277                     } else {
1278                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1279                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1280
1281                         s->avctx->release_buffer(s->avctx,
1282                                                  &s->input_picture[0]->f);
1283                     }
1284
1285                     emms_c();
1286                     ff_vbv_update(s, 0);
1287
1288                     goto no_output_pic;
1289                 }
1290             }
1291
1292             if (s->flags & CODEC_FLAG_PASS2) {
1293                 for (i = 0; i < s->max_b_frames + 1; i++) {
1294                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1295
1296                     if (pict_num >= s->rc_context.num_entries)
1297                         break;
1298                     if (!s->input_picture[i]) {
1299                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1300                         break;
1301                     }
1302
1303                     s->input_picture[i]->f.pict_type =
1304                         s->rc_context.entry[pict_num].new_pict_type;
1305                 }
1306             }
1307
1308             if (s->avctx->b_frame_strategy == 0) {
1309                 b_frames = s->max_b_frames;
1310                 while (b_frames && !s->input_picture[b_frames])
1311                     b_frames--;
1312             } else if (s->avctx->b_frame_strategy == 1) {
1313                 for (i = 1; i < s->max_b_frames + 1; i++) {
1314                     if (s->input_picture[i] &&
1315                         s->input_picture[i]->b_frame_score == 0) {
1316                         s->input_picture[i]->b_frame_score =
1317                             get_intra_count(s,
1318                                             s->input_picture[i    ]->f.data[0],
1319                                             s->input_picture[i - 1]->f.data[0],
1320                                             s->linesize) + 1;
1321                     }
1322                 }
1323                 for (i = 0; i < s->max_b_frames + 1; i++) {
1324                     if (s->input_picture[i] == NULL ||
1325                         s->input_picture[i]->b_frame_score - 1 >
1326                             s->mb_num / s->avctx->b_sensitivity)
1327                         break;
1328                 }
1329
1330                 b_frames = FFMAX(0, i - 1);
1331
1332                 /* reset scores */
1333                 for (i = 0; i < b_frames + 1; i++) {
1334                     s->input_picture[i]->b_frame_score = 0;
1335                 }
1336             } else if (s->avctx->b_frame_strategy == 2) {
1337                 b_frames = estimate_best_b_count(s);
1338             } else {
1339                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1340                 b_frames = 0;
1341             }
1342
1343             emms_c();
1344             //static int b_count = 0;
1345             //b_count += b_frames;
1346             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1347
1348             for (i = b_frames - 1; i >= 0; i--) {
1349                 int type = s->input_picture[i]->f.pict_type;
1350                 if (type && type != AV_PICTURE_TYPE_B)
1351                     b_frames = i;
1352             }
1353             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1354                 b_frames == s->max_b_frames) {
1355                 av_log(s->avctx, AV_LOG_ERROR,
1356                        "warning, too many b frames in a row\n");
1357             }
1358
1359             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1360                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1361                     s->gop_size > s->picture_in_gop_number) {
1362                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1363                 } else {
1364                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1365                         b_frames = 0;
1366                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1367                 }
1368             }
1369
1370             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1371                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1372                 b_frames--;
1373
1374             s->reordered_input_picture[0] = s->input_picture[b_frames];
1375             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1376                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1377             s->reordered_input_picture[0]->f.coded_picture_number =
1378                 s->coded_picture_number++;
1379             for (i = 0; i < b_frames; i++) {
1380                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1381                 s->reordered_input_picture[i + 1]->f.pict_type =
1382                     AV_PICTURE_TYPE_B;
1383                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1384                     s->coded_picture_number++;
1385             }
1386         }
1387     }
1388 no_output_pic:
1389     if (s->reordered_input_picture[0]) {
1390         s->reordered_input_picture[0]->f.reference =
1391            s->reordered_input_picture[0]->f.pict_type !=
1392                AV_PICTURE_TYPE_B ? 3 : 0;
1393
1394         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1395
1396         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1397             s->avctx->rc_buffer_size) {
1398             // input is a shared pix, so we can't modifiy it -> alloc a new
1399             // one & ensure that the shared one is reuseable
1400
1401             Picture *pic;
1402             int i = ff_find_unused_picture(s, 0);
1403             if (i < 0)
1404                 return i;
1405             pic = &s->picture[i];
1406
1407             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1408             if (ff_alloc_picture(s, pic, 0) < 0) {
1409                 return -1;
1410             }
1411
1412             /* mark us unused / free shared pic */
1413             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1414                 s->avctx->release_buffer(s->avctx,
1415                                          &s->reordered_input_picture[0]->f);
1416             for (i = 0; i < 4; i++)
1417                 s->reordered_input_picture[0]->f.data[i] = NULL;
1418             s->reordered_input_picture[0]->f.type = 0;
1419
1420             copy_picture_attributes(s, &pic->f,
1421                                     &s->reordered_input_picture[0]->f);
1422
1423             s->current_picture_ptr = pic;
1424         } else {
1425             // input is not a shared pix -> reuse buffer for current_pix
1426
1427             assert(s->reordered_input_picture[0]->f.type ==
1428                        FF_BUFFER_TYPE_USER ||
1429                    s->reordered_input_picture[0]->f.type ==
1430                        FF_BUFFER_TYPE_INTERNAL);
1431
1432             s->current_picture_ptr = s->reordered_input_picture[0];
1433             for (i = 0; i < 4; i++) {
1434                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1435             }
1436         }
1437         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1438
1439         s->picture_number = s->new_picture.f.display_picture_number;
1440         //printf("dpn:%d\n", s->picture_number);
1441     } else {
1442         memset(&s->new_picture, 0, sizeof(Picture));
1443     }
1444     return 0;
1445 }
1446
1447 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1448                           AVFrame *pic_arg, int *got_packet)
1449 {
1450     MpegEncContext *s = avctx->priv_data;
1451     int i, stuffing_count, ret;
1452     int context_count = s->slice_context_count;
1453
1454     s->picture_in_gop_number++;
1455
1456     if (load_input_picture(s, pic_arg) < 0)
1457         return -1;
1458
1459     if (select_input_picture(s) < 0) {
1460         return -1;
1461     }
1462
1463     /* output? */
1464     if (s->new_picture.f.data[0]) {
1465         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1466             return ret;
1467         if (s->mb_info) {
1468             s->mb_info_ptr = av_packet_new_side_data(pkt,
1469                                  AV_PKT_DATA_H263_MB_INFO,
1470                                  s->mb_width*s->mb_height*12);
1471             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1472         }
1473
1474         for (i = 0; i < context_count; i++) {
1475             int start_y = s->thread_context[i]->start_mb_y;
1476             int   end_y = s->thread_context[i]->  end_mb_y;
1477             int h       = s->mb_height;
1478             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1479             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1480
1481             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1482         }
1483
1484         s->pict_type = s->new_picture.f.pict_type;
1485         //emms_c();
1486         //printf("qs:%f %f %d\n", s->new_picture.quality,
1487         //       s->current_picture.quality, s->qscale);
1488         ff_MPV_frame_start(s, avctx);
1489 vbv_retry:
1490         if (encode_picture(s, s->picture_number) < 0)
1491             return -1;
1492
1493         avctx->header_bits = s->header_bits;
1494         avctx->mv_bits     = s->mv_bits;
1495         avctx->misc_bits   = s->misc_bits;
1496         avctx->i_tex_bits  = s->i_tex_bits;
1497         avctx->p_tex_bits  = s->p_tex_bits;
1498         avctx->i_count     = s->i_count;
1499         // FIXME f/b_count in avctx
1500         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1501         avctx->skip_count  = s->skip_count;
1502
1503         ff_MPV_frame_end(s);
1504
1505         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1506             ff_mjpeg_encode_picture_trailer(s);
1507
1508         if (avctx->rc_buffer_size) {
1509             RateControlContext *rcc = &s->rc_context;
1510             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1511
1512             if (put_bits_count(&s->pb) > max_size &&
1513                 s->lambda < s->avctx->lmax) {
1514                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1515                                        (s->qscale + 1) / s->qscale);
1516                 if (s->adaptive_quant) {
1517                     int i;
1518                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1519                         s->lambda_table[i] =
1520                             FFMAX(s->lambda_table[i] + 1,
1521                                   s->lambda_table[i] * (s->qscale + 1) /
1522                                   s->qscale);
1523                 }
1524                 s->mb_skipped = 0;        // done in MPV_frame_start()
1525                 // done in encode_picture() so we must undo it
1526                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1527                     if (s->flipflop_rounding          ||
1528                         s->codec_id == CODEC_ID_H263P ||
1529                         s->codec_id == CODEC_ID_MPEG4)
1530                         s->no_rounding ^= 1;
1531                 }
1532                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1533                     s->time_base       = s->last_time_base;
1534                     s->last_non_b_time = s->time - s->pp_time;
1535                 }
1536                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1537                 for (i = 0; i < context_count; i++) {
1538                     PutBitContext *pb = &s->thread_context[i]->pb;
1539                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1540                 }
1541                 goto vbv_retry;
1542             }
1543
1544             assert(s->avctx->rc_max_rate);
1545         }
1546
1547         if (s->flags & CODEC_FLAG_PASS1)
1548             ff_write_pass1_stats(s);
1549
1550         for (i = 0; i < 4; i++) {
1551             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1552             avctx->error[i] += s->current_picture_ptr->f.error[i];
1553         }
1554
1555         if (s->flags & CODEC_FLAG_PASS1)
1556             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1557                    avctx->i_tex_bits + avctx->p_tex_bits ==
1558                        put_bits_count(&s->pb));
1559         flush_put_bits(&s->pb);
1560         s->frame_bits  = put_bits_count(&s->pb);
1561
1562         stuffing_count = ff_vbv_update(s, s->frame_bits);
1563         if (stuffing_count) {
1564             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1565                     stuffing_count + 50) {
1566                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1567                 return -1;
1568             }
1569
1570             switch (s->codec_id) {
1571             case CODEC_ID_MPEG1VIDEO:
1572             case CODEC_ID_MPEG2VIDEO:
1573                 while (stuffing_count--) {
1574                     put_bits(&s->pb, 8, 0);
1575                 }
1576             break;
1577             case CODEC_ID_MPEG4:
1578                 put_bits(&s->pb, 16, 0);
1579                 put_bits(&s->pb, 16, 0x1C3);
1580                 stuffing_count -= 4;
1581                 while (stuffing_count--) {
1582                     put_bits(&s->pb, 8, 0xFF);
1583                 }
1584             break;
1585             default:
1586                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1587             }
1588             flush_put_bits(&s->pb);
1589             s->frame_bits  = put_bits_count(&s->pb);
1590         }
1591
1592         /* update mpeg1/2 vbv_delay for CBR */
1593         if (s->avctx->rc_max_rate                          &&
1594             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1595             s->out_format == FMT_MPEG1                     &&
1596             90000LL * (avctx->rc_buffer_size - 1) <=
1597                 s->avctx->rc_max_rate * 0xFFFFLL) {
1598             int vbv_delay, min_delay;
1599             double inbits  = s->avctx->rc_max_rate *
1600                              av_q2d(s->avctx->time_base);
1601             int    minbits = s->frame_bits - 8 *
1602                              (s->vbv_delay_ptr - s->pb.buf - 1);
1603             double bits    = s->rc_context.buffer_index + minbits - inbits;
1604
1605             if (bits < 0)
1606                 av_log(s->avctx, AV_LOG_ERROR,
1607                        "Internal error, negative bits\n");
1608
1609             assert(s->repeat_first_field == 0);
1610
1611             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1612             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1613                         s->avctx->rc_max_rate;
1614
1615             vbv_delay = FFMAX(vbv_delay, min_delay);
1616
1617             assert(vbv_delay < 0xFFFF);
1618
1619             s->vbv_delay_ptr[0] &= 0xF8;
1620             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1621             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1622             s->vbv_delay_ptr[2] &= 0x07;
1623             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1624             avctx->vbv_delay     = vbv_delay * 300;
1625         }
1626         s->total_bits     += s->frame_bits;
1627         avctx->frame_bits  = s->frame_bits;
1628
1629         pkt->pts = s->current_picture.f.pts;
1630         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1631             if (!s->current_picture.f.coded_picture_number)
1632                 pkt->dts = pkt->pts - s->dts_delta;
1633             else
1634                 pkt->dts = s->reordered_pts;
1635             s->reordered_pts = pkt->pts;
1636         } else
1637             pkt->dts = pkt->pts;
1638         if (s->current_picture.f.key_frame)
1639             pkt->flags |= AV_PKT_FLAG_KEY;
1640         if (s->mb_info)
1641             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1642     } else {
1643         assert((put_bits_ptr(&s->pb) == s->pb.buf));
1644         s->frame_bits = 0;
1645     }
1646     assert((s->frame_bits & 7) == 0);
1647
1648     pkt->size = s->frame_bits / 8;
1649     *got_packet = !!pkt->size;
1650     return 0;
1651 }
1652
1653 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1654                                                 int n, int threshold)
1655 {
1656     static const char tab[64] = {
1657         3, 2, 2, 1, 1, 1, 1, 1,
1658         1, 1, 1, 1, 1, 1, 1, 1,
1659         1, 1, 1, 1, 1, 1, 1, 1,
1660         0, 0, 0, 0, 0, 0, 0, 0,
1661         0, 0, 0, 0, 0, 0, 0, 0,
1662         0, 0, 0, 0, 0, 0, 0, 0,
1663         0, 0, 0, 0, 0, 0, 0, 0,
1664         0, 0, 0, 0, 0, 0, 0, 0
1665     };
1666     int score = 0;
1667     int run = 0;
1668     int i;
1669     DCTELEM *block = s->block[n];
1670     const int last_index = s->block_last_index[n];
1671     int skip_dc;
1672
1673     if (threshold < 0) {
1674         skip_dc = 0;
1675         threshold = -threshold;
1676     } else
1677         skip_dc = 1;
1678
1679     /* Are all we could set to zero already zero? */
1680     if (last_index <= skip_dc - 1)
1681         return;
1682
1683     for (i = 0; i <= last_index; i++) {
1684         const int j = s->intra_scantable.permutated[i];
1685         const int level = FFABS(block[j]);
1686         if (level == 1) {
1687             if (skip_dc && i == 0)
1688                 continue;
1689             score += tab[run];
1690             run = 0;
1691         } else if (level > 1) {
1692             return;
1693         } else {
1694             run++;
1695         }
1696     }
1697     if (score >= threshold)
1698         return;
1699     for (i = skip_dc; i <= last_index; i++) {
1700         const int j = s->intra_scantable.permutated[i];
1701         block[j] = 0;
1702     }
1703     if (block[0])
1704         s->block_last_index[n] = 0;
1705     else
1706         s->block_last_index[n] = -1;
1707 }
1708
1709 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1710                                int last_index)
1711 {
1712     int i;
1713     const int maxlevel = s->max_qcoeff;
1714     const int minlevel = s->min_qcoeff;
1715     int overflow = 0;
1716
1717     if (s->mb_intra) {
1718         i = 1; // skip clipping of intra dc
1719     } else
1720         i = 0;
1721
1722     for (; i <= last_index; i++) {
1723         const int j = s->intra_scantable.permutated[i];
1724         int level = block[j];
1725
1726         if (level > maxlevel) {
1727             level = maxlevel;
1728             overflow++;
1729         } else if (level < minlevel) {
1730             level = minlevel;
1731             overflow++;
1732         }
1733
1734         block[j] = level;
1735     }
1736
1737     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1738         av_log(s->avctx, AV_LOG_INFO,
1739                "warning, clipping %d dct coefficients to %d..%d\n",
1740                overflow, minlevel, maxlevel);
1741 }
1742
1743 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1744 {
1745     int x, y;
1746     // FIXME optimize
1747     for (y = 0; y < 8; y++) {
1748         for (x = 0; x < 8; x++) {
1749             int x2, y2;
1750             int sum = 0;
1751             int sqr = 0;
1752             int count = 0;
1753
1754             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1755                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1756                     int v = ptr[x2 + y2 * stride];
1757                     sum += v;
1758                     sqr += v * v;
1759                     count++;
1760                 }
1761             }
1762             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1763         }
1764     }
1765 }
1766
1767 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1768                                                 int motion_x, int motion_y,
1769                                                 int mb_block_height,
1770                                                 int mb_block_count)
1771 {
1772     int16_t weight[8][64];
1773     DCTELEM orig[8][64];
1774     const int mb_x = s->mb_x;
1775     const int mb_y = s->mb_y;
1776     int i;
1777     int skip_dct[8];
1778     int dct_offset = s->linesize * 8; // default for progressive frames
1779     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1780     int wrap_y, wrap_c;
1781
1782     for (i = 0; i < mb_block_count; i++)
1783         skip_dct[i] = s->skipdct;
1784
1785     if (s->adaptive_quant) {
1786         const int last_qp = s->qscale;
1787         const int mb_xy = mb_x + mb_y * s->mb_stride;
1788
1789         s->lambda = s->lambda_table[mb_xy];
1790         update_qscale(s);
1791
1792         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1793             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1794             s->dquant = s->qscale - last_qp;
1795
1796             if (s->out_format == FMT_H263) {
1797                 s->dquant = av_clip(s->dquant, -2, 2);
1798
1799                 if (s->codec_id == CODEC_ID_MPEG4) {
1800                     if (!s->mb_intra) {
1801                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1802                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1803                                 s->dquant = 0;
1804                         }
1805                         if (s->mv_type == MV_TYPE_8X8)
1806                             s->dquant = 0;
1807                     }
1808                 }
1809             }
1810         }
1811         ff_set_qscale(s, last_qp + s->dquant);
1812     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1813         ff_set_qscale(s, s->qscale + s->dquant);
1814
1815     wrap_y = s->linesize;
1816     wrap_c = s->uvlinesize;
1817     ptr_y  = s->new_picture.f.data[0] +
1818              (mb_y * 16 * wrap_y)              + mb_x * 16;
1819     ptr_cb = s->new_picture.f.data[1] +
1820              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1821     ptr_cr = s->new_picture.f.data[2] +
1822              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1823
1824     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != CODEC_ID_AMV){
1825         uint8_t *ebuf = s->edge_emu_buffer + 32;
1826         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1827                                 mb_y * 16, s->width, s->height);
1828         ptr_y = ebuf;
1829         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1830                                 mb_block_height, mb_x * 8, mb_y * 8,
1831                                 (s->width+1) >> 1, (s->height+1) >> 1);
1832         ptr_cb = ebuf + 18 * wrap_y;
1833         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1834                                 mb_block_height, mb_x * 8, mb_y * 8,
1835                                 (s->width+1) >> 1, (s->height+1) >> 1);
1836         ptr_cr = ebuf + 18 * wrap_y + 8;
1837     }
1838
1839     if (s->mb_intra) {
1840         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1841             int progressive_score, interlaced_score;
1842
1843             s->interlaced_dct = 0;
1844             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1845                                                     NULL, wrap_y, 8) +
1846                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1847                                                     NULL, wrap_y, 8) - 400;
1848
1849             if (progressive_score > 0) {
1850                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1851                                                        NULL, wrap_y * 2, 8) +
1852                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1853                                                        NULL, wrap_y * 2, 8);
1854                 if (progressive_score > interlaced_score) {
1855                     s->interlaced_dct = 1;
1856
1857                     dct_offset = wrap_y;
1858                     wrap_y <<= 1;
1859                     if (s->chroma_format == CHROMA_422)
1860                         wrap_c <<= 1;
1861                 }
1862             }
1863         }
1864
1865         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1866         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1867         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1868         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1869
1870         if (s->flags & CODEC_FLAG_GRAY) {
1871             skip_dct[4] = 1;
1872             skip_dct[5] = 1;
1873         } else {
1874             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1875             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1876             if (!s->chroma_y_shift) { /* 422 */
1877                 s->dsp.get_pixels(s->block[6],
1878                                   ptr_cb + (dct_offset >> 1), wrap_c);
1879                 s->dsp.get_pixels(s->block[7],
1880                                   ptr_cr + (dct_offset >> 1), wrap_c);
1881             }
1882         }
1883     } else {
1884         op_pixels_func (*op_pix)[4];
1885         qpel_mc_func (*op_qpix)[16];
1886         uint8_t *dest_y, *dest_cb, *dest_cr;
1887
1888         dest_y  = s->dest[0];
1889         dest_cb = s->dest[1];
1890         dest_cr = s->dest[2];
1891
1892         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1893             op_pix  = s->dsp.put_pixels_tab;
1894             op_qpix = s->dsp.put_qpel_pixels_tab;
1895         } else {
1896             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1897             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1898         }
1899
1900         if (s->mv_dir & MV_DIR_FORWARD) {
1901             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data,
1902                        op_pix, op_qpix);
1903             op_pix  = s->dsp.avg_pixels_tab;
1904             op_qpix = s->dsp.avg_qpel_pixels_tab;
1905         }
1906         if (s->mv_dir & MV_DIR_BACKWARD) {
1907             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data,
1908                        op_pix, op_qpix);
1909         }
1910
1911         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1912             int progressive_score, interlaced_score;
1913
1914             s->interlaced_dct = 0;
1915             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1916                                                     ptr_y,              wrap_y,
1917                                                     8) +
1918                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1919                                                     ptr_y + wrap_y * 8, wrap_y,
1920                                                     8) - 400;
1921
1922             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1923                 progressive_score -= 400;
1924
1925             if (progressive_score > 0) {
1926                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1927                                                        ptr_y,
1928                                                        wrap_y * 2, 8) +
1929                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1930                                                        ptr_y + wrap_y,
1931                                                        wrap_y * 2, 8);
1932
1933                 if (progressive_score > interlaced_score) {
1934                     s->interlaced_dct = 1;
1935
1936                     dct_offset = wrap_y;
1937                     wrap_y <<= 1;
1938                     if (s->chroma_format == CHROMA_422)
1939                         wrap_c <<= 1;
1940                 }
1941             }
1942         }
1943
1944         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1945         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1946         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1947                            dest_y + dct_offset, wrap_y);
1948         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1949                            dest_y + dct_offset + 8, wrap_y);
1950
1951         if (s->flags & CODEC_FLAG_GRAY) {
1952             skip_dct[4] = 1;
1953             skip_dct[5] = 1;
1954         } else {
1955             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1956             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1957             if (!s->chroma_y_shift) { /* 422 */
1958                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1959                                    dest_cb + (dct_offset >> 1), wrap_c);
1960                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1961                                    dest_cr + (dct_offset >> 1), wrap_c);
1962             }
1963         }
1964         /* pre quantization */
1965         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1966                 2 * s->qscale * s->qscale) {
1967             // FIXME optimize
1968             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1969                               wrap_y, 8) < 20 * s->qscale)
1970                 skip_dct[0] = 1;
1971             if (s->dsp.sad[1](NULL, ptr_y + 8,
1972                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1973                 skip_dct[1] = 1;
1974             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1975                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1976                 skip_dct[2] = 1;
1977             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1978                               dest_y + dct_offset + 8,
1979                               wrap_y, 8) < 20 * s->qscale)
1980                 skip_dct[3] = 1;
1981             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1982                               wrap_c, 8) < 20 * s->qscale)
1983                 skip_dct[4] = 1;
1984             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1985                               wrap_c, 8) < 20 * s->qscale)
1986                 skip_dct[5] = 1;
1987             if (!s->chroma_y_shift) { /* 422 */
1988                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1989                                   dest_cb + (dct_offset >> 1),
1990                                   wrap_c, 8) < 20 * s->qscale)
1991                     skip_dct[6] = 1;
1992                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1993                                   dest_cr + (dct_offset >> 1),
1994                                   wrap_c, 8) < 20 * s->qscale)
1995                     skip_dct[7] = 1;
1996             }
1997         }
1998     }
1999
2000     if (s->quantizer_noise_shaping) {
2001         if (!skip_dct[0])
2002             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2003         if (!skip_dct[1])
2004             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2005         if (!skip_dct[2])
2006             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2007         if (!skip_dct[3])
2008             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2009         if (!skip_dct[4])
2010             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2011         if (!skip_dct[5])
2012             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2013         if (!s->chroma_y_shift) { /* 422 */
2014             if (!skip_dct[6])
2015                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2016                                   wrap_c);
2017             if (!skip_dct[7])
2018                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2019                                   wrap_c);
2020         }
2021         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
2022     }
2023
2024     /* DCT & quantize */
2025     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2026     {
2027         for (i = 0; i < mb_block_count; i++) {
2028             if (!skip_dct[i]) {
2029                 int overflow;
2030                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2031                 // FIXME we could decide to change to quantizer instead of
2032                 // clipping
2033                 // JS: I don't think that would be a good idea it could lower
2034                 //     quality instead of improve it. Just INTRADC clipping
2035                 //     deserves changes in quantizer
2036                 if (overflow)
2037                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2038             } else
2039                 s->block_last_index[i] = -1;
2040         }
2041         if (s->quantizer_noise_shaping) {
2042             for (i = 0; i < mb_block_count; i++) {
2043                 if (!skip_dct[i]) {
2044                     s->block_last_index[i] =
2045                         dct_quantize_refine(s, s->block[i], weight[i],
2046                                             orig[i], i, s->qscale);
2047                 }
2048             }
2049         }
2050
2051         if (s->luma_elim_threshold && !s->mb_intra)
2052             for (i = 0; i < 4; i++)
2053                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2054         if (s->chroma_elim_threshold && !s->mb_intra)
2055             for (i = 4; i < mb_block_count; i++)
2056                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2057
2058         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2059             for (i = 0; i < mb_block_count; i++) {
2060                 if (s->block_last_index[i] == -1)
2061                     s->coded_score[i] = INT_MAX / 256;
2062             }
2063         }
2064     }
2065
2066     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2067         s->block_last_index[4] =
2068         s->block_last_index[5] = 0;
2069         s->block[4][0] =
2070         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2071     }
2072
2073     // non c quantize code returns incorrect block_last_index FIXME
2074     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2075         for (i = 0; i < mb_block_count; i++) {
2076             int j;
2077             if (s->block_last_index[i] > 0) {
2078                 for (j = 63; j > 0; j--) {
2079                     if (s->block[i][s->intra_scantable.permutated[j]])
2080                         break;
2081                 }
2082                 s->block_last_index[i] = j;
2083             }
2084         }
2085     }
2086
2087     /* huffman encode */
2088     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2089     case CODEC_ID_MPEG1VIDEO:
2090     case CODEC_ID_MPEG2VIDEO:
2091         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2092             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2093         break;
2094     case CODEC_ID_MPEG4:
2095         if (CONFIG_MPEG4_ENCODER)
2096             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2097         break;
2098     case CODEC_ID_MSMPEG4V2:
2099     case CODEC_ID_MSMPEG4V3:
2100     case CODEC_ID_WMV1:
2101         if (CONFIG_MSMPEG4_ENCODER)
2102             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2103         break;
2104     case CODEC_ID_WMV2:
2105         if (CONFIG_WMV2_ENCODER)
2106             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2107         break;
2108     case CODEC_ID_H261:
2109         if (CONFIG_H261_ENCODER)
2110             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2111         break;
2112     case CODEC_ID_H263:
2113     case CODEC_ID_H263P:
2114     case CODEC_ID_FLV1:
2115     case CODEC_ID_RV10:
2116     case CODEC_ID_RV20:
2117         if (CONFIG_H263_ENCODER)
2118             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2119         break;
2120     case CODEC_ID_MJPEG:
2121     case CODEC_ID_AMV:
2122         if (CONFIG_MJPEG_ENCODER)
2123             ff_mjpeg_encode_mb(s, s->block);
2124         break;
2125     default:
2126         assert(0);
2127     }
2128 }
2129
2130 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2131 {
2132     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2133     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2134 }
2135
2136 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2137     int i;
2138
2139     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2140
2141     /* mpeg1 */
2142     d->mb_skip_run= s->mb_skip_run;
2143     for(i=0; i<3; i++)
2144         d->last_dc[i] = s->last_dc[i];
2145
2146     /* statistics */
2147     d->mv_bits= s->mv_bits;
2148     d->i_tex_bits= s->i_tex_bits;
2149     d->p_tex_bits= s->p_tex_bits;
2150     d->i_count= s->i_count;
2151     d->f_count= s->f_count;
2152     d->b_count= s->b_count;
2153     d->skip_count= s->skip_count;
2154     d->misc_bits= s->misc_bits;
2155     d->last_bits= 0;
2156
2157     d->mb_skipped= 0;
2158     d->qscale= s->qscale;
2159     d->dquant= s->dquant;
2160
2161     d->esc3_level_length= s->esc3_level_length;
2162 }
2163
2164 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2165     int i;
2166
2167     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2168     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2169
2170     /* mpeg1 */
2171     d->mb_skip_run= s->mb_skip_run;
2172     for(i=0; i<3; i++)
2173         d->last_dc[i] = s->last_dc[i];
2174
2175     /* statistics */
2176     d->mv_bits= s->mv_bits;
2177     d->i_tex_bits= s->i_tex_bits;
2178     d->p_tex_bits= s->p_tex_bits;
2179     d->i_count= s->i_count;
2180     d->f_count= s->f_count;
2181     d->b_count= s->b_count;
2182     d->skip_count= s->skip_count;
2183     d->misc_bits= s->misc_bits;
2184
2185     d->mb_intra= s->mb_intra;
2186     d->mb_skipped= s->mb_skipped;
2187     d->mv_type= s->mv_type;
2188     d->mv_dir= s->mv_dir;
2189     d->pb= s->pb;
2190     if(s->data_partitioning){
2191         d->pb2= s->pb2;
2192         d->tex_pb= s->tex_pb;
2193     }
2194     d->block= s->block;
2195     for(i=0; i<8; i++)
2196         d->block_last_index[i]= s->block_last_index[i];
2197     d->interlaced_dct= s->interlaced_dct;
2198     d->qscale= s->qscale;
2199
2200     d->esc3_level_length= s->esc3_level_length;
2201 }
2202
2203 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2204                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2205                            int *dmin, int *next_block, int motion_x, int motion_y)
2206 {
2207     int score;
2208     uint8_t *dest_backup[3];
2209
2210     copy_context_before_encode(s, backup, type);
2211
2212     s->block= s->blocks[*next_block];
2213     s->pb= pb[*next_block];
2214     if(s->data_partitioning){
2215         s->pb2   = pb2   [*next_block];
2216         s->tex_pb= tex_pb[*next_block];
2217     }
2218
2219     if(*next_block){
2220         memcpy(dest_backup, s->dest, sizeof(s->dest));
2221         s->dest[0] = s->rd_scratchpad;
2222         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2223         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2224         assert(s->linesize >= 32); //FIXME
2225     }
2226
2227     encode_mb(s, motion_x, motion_y);
2228
2229     score= put_bits_count(&s->pb);
2230     if(s->data_partitioning){
2231         score+= put_bits_count(&s->pb2);
2232         score+= put_bits_count(&s->tex_pb);
2233     }
2234
2235     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2236         ff_MPV_decode_mb(s, s->block);
2237
2238         score *= s->lambda2;
2239         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2240     }
2241
2242     if(*next_block){
2243         memcpy(s->dest, dest_backup, sizeof(s->dest));
2244     }
2245
2246     if(score<*dmin){
2247         *dmin= score;
2248         *next_block^=1;
2249
2250         copy_context_after_encode(best, s, type);
2251     }
2252 }
2253
2254 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2255     uint32_t *sq = ff_squareTbl + 256;
2256     int acc=0;
2257     int x,y;
2258
2259     if(w==16 && h==16)
2260         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2261     else if(w==8 && h==8)
2262         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2263
2264     for(y=0; y<h; y++){
2265         for(x=0; x<w; x++){
2266             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2267         }
2268     }
2269
2270     assert(acc>=0);
2271
2272     return acc;
2273 }
2274
2275 static int sse_mb(MpegEncContext *s){
2276     int w= 16;
2277     int h= 16;
2278
2279     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2280     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2281
2282     if(w==16 && h==16)
2283       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2284         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2285                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2286                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2287       }else{
2288         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2289                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2290                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2291       }
2292     else
2293         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2294                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2295                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2296 }
2297
2298 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2299     MpegEncContext *s= *(void**)arg;
2300
2301
2302     s->me.pre_pass=1;
2303     s->me.dia_size= s->avctx->pre_dia_size;
2304     s->first_slice_line=1;
2305     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2306         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2307             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2308         }
2309         s->first_slice_line=0;
2310     }
2311
2312     s->me.pre_pass=0;
2313
2314     return 0;
2315 }
2316
2317 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2318     MpegEncContext *s= *(void**)arg;
2319
2320     ff_check_alignment();
2321
2322     s->me.dia_size= s->avctx->dia_size;
2323     s->first_slice_line=1;
2324     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2325         s->mb_x=0; //for block init below
2326         ff_init_block_index(s);
2327         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2328             s->block_index[0]+=2;
2329             s->block_index[1]+=2;
2330             s->block_index[2]+=2;
2331             s->block_index[3]+=2;
2332
2333             /* compute motion vector & mb_type and store in context */
2334             if(s->pict_type==AV_PICTURE_TYPE_B)
2335                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2336             else
2337                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2338         }
2339         s->first_slice_line=0;
2340     }
2341     return 0;
2342 }
2343
2344 static int mb_var_thread(AVCodecContext *c, void *arg){
2345     MpegEncContext *s= *(void**)arg;
2346     int mb_x, mb_y;
2347
2348     ff_check_alignment();
2349
2350     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2351         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2352             int xx = mb_x * 16;
2353             int yy = mb_y * 16;
2354             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2355             int varc;
2356             int sum = s->dsp.pix_sum(pix, s->linesize);
2357
2358             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2359
2360             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2361             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2362             s->me.mb_var_sum_temp    += varc;
2363         }
2364     }
2365     return 0;
2366 }
2367
2368 static void write_slice_end(MpegEncContext *s){
2369     if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4){
2370         if(s->partitioned_frame){
2371             ff_mpeg4_merge_partitions(s);
2372         }
2373
2374         ff_mpeg4_stuffing(&s->pb);
2375     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2376         ff_mjpeg_encode_stuffing(s);
2377     }
2378
2379     avpriv_align_put_bits(&s->pb);
2380     flush_put_bits(&s->pb);
2381
2382     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2383         s->misc_bits+= get_bits_diff(s);
2384 }
2385
2386 static void write_mb_info(MpegEncContext *s)
2387 {
2388     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2389     int offset = put_bits_count(&s->pb);
2390     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2391     int gobn = s->mb_y / s->gob_index;
2392     int pred_x, pred_y;
2393     if (CONFIG_H263_ENCODER)
2394         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2395     bytestream_put_le32(&ptr, offset);
2396     bytestream_put_byte(&ptr, s->qscale);
2397     bytestream_put_byte(&ptr, gobn);
2398     bytestream_put_le16(&ptr, mba);
2399     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2400     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2401     /* 4MV not implemented */
2402     bytestream_put_byte(&ptr, 0); /* hmv2 */
2403     bytestream_put_byte(&ptr, 0); /* vmv2 */
2404 }
2405
2406 static void update_mb_info(MpegEncContext *s, int startcode)
2407 {
2408     if (!s->mb_info)
2409         return;
2410     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2411         s->mb_info_size += 12;
2412         s->prev_mb_info = s->last_mb_info;
2413     }
2414     if (startcode) {
2415         s->prev_mb_info = put_bits_count(&s->pb)/8;
2416         /* This might have incremented mb_info_size above, and we return without
2417          * actually writing any info into that slot yet. But in that case,
2418          * this will be called again at the start of the after writing the
2419          * start code, actually writing the mb info. */
2420         return;
2421     }
2422
2423     s->last_mb_info = put_bits_count(&s->pb)/8;
2424     if (!s->mb_info_size)
2425         s->mb_info_size += 12;
2426     write_mb_info(s);
2427 }
2428
2429 static int encode_thread(AVCodecContext *c, void *arg){
2430     MpegEncContext *s= *(void**)arg;
2431     int mb_x, mb_y, pdif = 0;
2432     int chr_h= 16>>s->chroma_y_shift;
2433     int i, j;
2434     MpegEncContext best_s, backup_s;
2435     uint8_t bit_buf[2][MAX_MB_BYTES];
2436     uint8_t bit_buf2[2][MAX_MB_BYTES];
2437     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2438     PutBitContext pb[2], pb2[2], tex_pb[2];
2439 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2440
2441     ff_check_alignment();
2442
2443     for(i=0; i<2; i++){
2444         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2445         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2446         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2447     }
2448
2449     s->last_bits= put_bits_count(&s->pb);
2450     s->mv_bits=0;
2451     s->misc_bits=0;
2452     s->i_tex_bits=0;
2453     s->p_tex_bits=0;
2454     s->i_count=0;
2455     s->f_count=0;
2456     s->b_count=0;
2457     s->skip_count=0;
2458
2459     for(i=0; i<3; i++){
2460         /* init last dc values */
2461         /* note: quant matrix value (8) is implied here */
2462         s->last_dc[i] = 128 << s->intra_dc_precision;
2463
2464         s->current_picture.f.error[i] = 0;
2465     }
2466     if(s->codec_id==CODEC_ID_AMV){
2467         s->last_dc[0] = 128*8/13;
2468         s->last_dc[1] = 128*8/14;
2469         s->last_dc[2] = 128*8/14;
2470     }
2471     s->mb_skip_run = 0;
2472     memset(s->last_mv, 0, sizeof(s->last_mv));
2473
2474     s->last_mv_dir = 0;
2475
2476     switch(s->codec_id){
2477     case CODEC_ID_H263:
2478     case CODEC_ID_H263P:
2479     case CODEC_ID_FLV1:
2480         if (CONFIG_H263_ENCODER)
2481             s->gob_index = ff_h263_get_gob_height(s);
2482         break;
2483     case CODEC_ID_MPEG4:
2484         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2485             ff_mpeg4_init_partitions(s);
2486         break;
2487     }
2488
2489     s->resync_mb_x=0;
2490     s->resync_mb_y=0;
2491     s->first_slice_line = 1;
2492     s->ptr_lastgob = s->pb.buf;
2493     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2494 //    printf("row %d at %X\n", s->mb_y, (int)s);
2495         s->mb_x=0;
2496         s->mb_y= mb_y;
2497
2498         ff_set_qscale(s, s->qscale);
2499         ff_init_block_index(s);
2500
2501         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2502             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2503             int mb_type= s->mb_type[xy];
2504 //            int d;
2505             int dmin= INT_MAX;
2506             int dir;
2507
2508             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2509                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2510                 return -1;
2511             }
2512             if(s->data_partitioning){
2513                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2514                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2515                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2516                     return -1;
2517                 }
2518             }
2519
2520             s->mb_x = mb_x;
2521             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2522             ff_update_block_index(s);
2523
2524             if(CONFIG_H261_ENCODER && s->codec_id == CODEC_ID_H261){
2525                 ff_h261_reorder_mb_index(s);
2526                 xy= s->mb_y*s->mb_stride + s->mb_x;
2527                 mb_type= s->mb_type[xy];
2528             }
2529
2530             /* write gob / video packet header  */
2531             if(s->rtp_mode){
2532                 int current_packet_size, is_gob_start;
2533
2534                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2535
2536                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2537
2538                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2539
2540                 switch(s->codec_id){
2541                 case CODEC_ID_H263:
2542                 case CODEC_ID_H263P:
2543                     if(!s->h263_slice_structured)
2544                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2545                     break;
2546                 case CODEC_ID_MPEG2VIDEO:
2547                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2548                 case CODEC_ID_MPEG1VIDEO:
2549                     if(s->mb_skip_run) is_gob_start=0;
2550                     break;
2551                 case CODEC_ID_MJPEG:
2552                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2553                     break;
2554                 }
2555
2556                 if(is_gob_start){
2557                     if(s->start_mb_y != mb_y || mb_x!=0){
2558                         write_slice_end(s);
2559                         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
2560                             ff_mpeg4_init_partitions(s);
2561                         }
2562                     }
2563
2564                     assert((put_bits_count(&s->pb)&7) == 0);
2565                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2566
2567                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2568                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2569                         int d= 100 / s->avctx->error_rate;
2570                         if(r % d == 0){
2571                             current_packet_size=0;
2572                             s->pb.buf_ptr= s->ptr_lastgob;
2573                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2574                         }
2575                     }
2576
2577                     if (s->avctx->rtp_callback){
2578                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2579                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2580                     }
2581                     update_mb_info(s, 1);
2582
2583                     switch(s->codec_id){
2584                     case CODEC_ID_MPEG4:
2585                         if (CONFIG_MPEG4_ENCODER) {
2586                             ff_mpeg4_encode_video_packet_header(s);
2587                             ff_mpeg4_clean_buffers(s);
2588                         }
2589                     break;
2590                     case CODEC_ID_MPEG1VIDEO:
2591                     case CODEC_ID_MPEG2VIDEO:
2592                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2593                             ff_mpeg1_encode_slice_header(s);
2594                             ff_mpeg1_clean_buffers(s);
2595                         }
2596                     break;
2597                     case CODEC_ID_H263:
2598                     case CODEC_ID_H263P:
2599                         if (CONFIG_H263_ENCODER)
2600                             ff_h263_encode_gob_header(s, mb_y);
2601                     break;
2602                     }
2603
2604                     if(s->flags&CODEC_FLAG_PASS1){
2605                         int bits= put_bits_count(&s->pb);
2606                         s->misc_bits+= bits - s->last_bits;
2607                         s->last_bits= bits;
2608                     }
2609
2610                     s->ptr_lastgob += current_packet_size;
2611                     s->first_slice_line=1;
2612                     s->resync_mb_x=mb_x;
2613                     s->resync_mb_y=mb_y;
2614                 }
2615             }
2616
2617             if(  (s->resync_mb_x   == s->mb_x)
2618                && s->resync_mb_y+1 == s->mb_y){
2619                 s->first_slice_line=0;
2620             }
2621
2622             s->mb_skipped=0;
2623             s->dquant=0; //only for QP_RD
2624
2625             update_mb_info(s, 0);
2626
2627             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2628                 int next_block=0;
2629                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2630
2631                 copy_context_before_encode(&backup_s, s, -1);
2632                 backup_s.pb= s->pb;
2633                 best_s.data_partitioning= s->data_partitioning;
2634                 best_s.partitioned_frame= s->partitioned_frame;
2635                 if(s->data_partitioning){
2636                     backup_s.pb2= s->pb2;
2637                     backup_s.tex_pb= s->tex_pb;
2638                 }
2639
2640                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2641                     s->mv_dir = MV_DIR_FORWARD;
2642                     s->mv_type = MV_TYPE_16X16;
2643                     s->mb_intra= 0;
2644                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2645                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2646                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2647                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2648                 }
2649                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2650                     s->mv_dir = MV_DIR_FORWARD;
2651                     s->mv_type = MV_TYPE_FIELD;
2652                     s->mb_intra= 0;
2653                     for(i=0; i<2; i++){
2654                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2655                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2656                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2657                     }
2658                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2659                                  &dmin, &next_block, 0, 0);
2660                 }
2661                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2662                     s->mv_dir = MV_DIR_FORWARD;
2663                     s->mv_type = MV_TYPE_16X16;
2664                     s->mb_intra= 0;
2665                     s->mv[0][0][0] = 0;
2666                     s->mv[0][0][1] = 0;
2667                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2668                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2669                 }
2670                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2671                     s->mv_dir = MV_DIR_FORWARD;
2672                     s->mv_type = MV_TYPE_8X8;
2673                     s->mb_intra= 0;
2674                     for(i=0; i<4; i++){
2675                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2676                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2677                     }
2678                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2679                                  &dmin, &next_block, 0, 0);
2680                 }
2681                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2682                     s->mv_dir = MV_DIR_FORWARD;
2683                     s->mv_type = MV_TYPE_16X16;
2684                     s->mb_intra= 0;
2685                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2686                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2687                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2688                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2689                 }
2690                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2691                     s->mv_dir = MV_DIR_BACKWARD;
2692                     s->mv_type = MV_TYPE_16X16;
2693                     s->mb_intra= 0;
2694                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2695                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2696                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2697                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2698                 }
2699                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2700                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2701                     s->mv_type = MV_TYPE_16X16;
2702                     s->mb_intra= 0;
2703                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2704                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2705                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2706                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2707                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2708                                  &dmin, &next_block, 0, 0);
2709                 }
2710                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2711                     s->mv_dir = MV_DIR_FORWARD;
2712                     s->mv_type = MV_TYPE_FIELD;
2713                     s->mb_intra= 0;
2714                     for(i=0; i<2; i++){
2715                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2716                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2717                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2718                     }
2719                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2720                                  &dmin, &next_block, 0, 0);
2721                 }
2722                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2723                     s->mv_dir = MV_DIR_BACKWARD;
2724                     s->mv_type = MV_TYPE_FIELD;
2725                     s->mb_intra= 0;
2726                     for(i=0; i<2; i++){
2727                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2728                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2729                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2730                     }
2731                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2732                                  &dmin, &next_block, 0, 0);
2733                 }
2734                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2735                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2736                     s->mv_type = MV_TYPE_FIELD;
2737                     s->mb_intra= 0;
2738                     for(dir=0; dir<2; dir++){
2739                         for(i=0; i<2; i++){
2740                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2741                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2742                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2743                         }
2744                     }
2745                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2746                                  &dmin, &next_block, 0, 0);
2747                 }
2748                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2749                     s->mv_dir = 0;
2750                     s->mv_type = MV_TYPE_16X16;
2751                     s->mb_intra= 1;
2752                     s->mv[0][0][0] = 0;
2753                     s->mv[0][0][1] = 0;
2754                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2755                                  &dmin, &next_block, 0, 0);
2756                     if(s->h263_pred || s->h263_aic){
2757                         if(best_s.mb_intra)
2758                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2759                         else
2760                             ff_clean_intra_table_entries(s); //old mode?
2761                     }
2762                 }
2763
2764                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2765                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2766                         const int last_qp= backup_s.qscale;
2767                         int qpi, qp, dc[6];
2768                         DCTELEM ac[6][16];
2769                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2770                         static const int dquant_tab[4]={-1,1,-2,2};
2771
2772                         assert(backup_s.dquant == 0);
2773
2774                         //FIXME intra
2775                         s->mv_dir= best_s.mv_dir;
2776                         s->mv_type = MV_TYPE_16X16;
2777                         s->mb_intra= best_s.mb_intra;
2778                         s->mv[0][0][0] = best_s.mv[0][0][0];
2779                         s->mv[0][0][1] = best_s.mv[0][0][1];
2780                         s->mv[1][0][0] = best_s.mv[1][0][0];
2781                         s->mv[1][0][1] = best_s.mv[1][0][1];
2782
2783                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2784                         for(; qpi<4; qpi++){
2785                             int dquant= dquant_tab[qpi];
2786                             qp= last_qp + dquant;
2787                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2788                                 continue;
2789                             backup_s.dquant= dquant;
2790                             if(s->mb_intra && s->dc_val[0]){
2791                                 for(i=0; i<6; i++){
2792                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2793                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2794                                 }
2795                             }
2796
2797                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2798                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2799                             if(best_s.qscale != qp){
2800                                 if(s->mb_intra && s->dc_val[0]){
2801                                     for(i=0; i<6; i++){
2802                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2803                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2804                                     }
2805                                 }
2806                             }
2807                         }
2808                     }
2809                 }
2810                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2811                     int mx= s->b_direct_mv_table[xy][0];
2812                     int my= s->b_direct_mv_table[xy][1];
2813
2814                     backup_s.dquant = 0;
2815                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2816                     s->mb_intra= 0;
2817                     ff_mpeg4_set_direct_mv(s, mx, my);
2818                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2819                                  &dmin, &next_block, mx, my);
2820                 }
2821                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2822                     backup_s.dquant = 0;
2823                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2824                     s->mb_intra= 0;
2825                     ff_mpeg4_set_direct_mv(s, 0, 0);
2826                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2827                                  &dmin, &next_block, 0, 0);
2828                 }
2829                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2830                     int coded=0;
2831                     for(i=0; i<6; i++)
2832                         coded |= s->block_last_index[i];
2833                     if(coded){
2834                         int mx,my;
2835                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2836                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2837                             mx=my=0; //FIXME find the one we actually used
2838                             ff_mpeg4_set_direct_mv(s, mx, my);
2839                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2840                             mx= s->mv[1][0][0];
2841                             my= s->mv[1][0][1];
2842                         }else{
2843                             mx= s->mv[0][0][0];
2844                             my= s->mv[0][0][1];
2845                         }
2846
2847                         s->mv_dir= best_s.mv_dir;
2848                         s->mv_type = best_s.mv_type;
2849                         s->mb_intra= 0;
2850 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2851                         s->mv[0][0][1] = best_s.mv[0][0][1];
2852                         s->mv[1][0][0] = best_s.mv[1][0][0];
2853                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2854                         backup_s.dquant= 0;
2855                         s->skipdct=1;
2856                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2857                                         &dmin, &next_block, mx, my);
2858                         s->skipdct=0;
2859                     }
2860                 }
2861
2862                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2863
2864                 copy_context_after_encode(s, &best_s, -1);
2865
2866                 pb_bits_count= put_bits_count(&s->pb);
2867                 flush_put_bits(&s->pb);
2868                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2869                 s->pb= backup_s.pb;
2870
2871                 if(s->data_partitioning){
2872                     pb2_bits_count= put_bits_count(&s->pb2);
2873                     flush_put_bits(&s->pb2);
2874                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2875                     s->pb2= backup_s.pb2;
2876
2877                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2878                     flush_put_bits(&s->tex_pb);
2879                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2880                     s->tex_pb= backup_s.tex_pb;
2881                 }
2882                 s->last_bits= put_bits_count(&s->pb);
2883
2884                 if (CONFIG_H263_ENCODER &&
2885                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2886                     ff_h263_update_motion_val(s);
2887
2888                 if(next_block==0){ //FIXME 16 vs linesize16
2889                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2890                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2891                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2892                 }
2893
2894                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2895                     ff_MPV_decode_mb(s, s->block);
2896             } else {
2897                 int motion_x = 0, motion_y = 0;
2898                 s->mv_type=MV_TYPE_16X16;
2899                 // only one MB-Type possible
2900
2901                 switch(mb_type){
2902                 case CANDIDATE_MB_TYPE_INTRA:
2903                     s->mv_dir = 0;
2904                     s->mb_intra= 1;
2905                     motion_x= s->mv[0][0][0] = 0;
2906                     motion_y= s->mv[0][0][1] = 0;
2907                     break;
2908                 case CANDIDATE_MB_TYPE_INTER:
2909                     s->mv_dir = MV_DIR_FORWARD;
2910                     s->mb_intra= 0;
2911                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2912                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2913                     break;
2914                 case CANDIDATE_MB_TYPE_INTER_I:
2915                     s->mv_dir = MV_DIR_FORWARD;
2916                     s->mv_type = MV_TYPE_FIELD;
2917                     s->mb_intra= 0;
2918                     for(i=0; i<2; i++){
2919                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2920                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2921                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2922                     }
2923                     break;
2924                 case CANDIDATE_MB_TYPE_INTER4V:
2925                     s->mv_dir = MV_DIR_FORWARD;
2926                     s->mv_type = MV_TYPE_8X8;
2927                     s->mb_intra= 0;
2928                     for(i=0; i<4; i++){
2929                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2930                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2931                     }
2932                     break;
2933                 case CANDIDATE_MB_TYPE_DIRECT:
2934                     if (CONFIG_MPEG4_ENCODER) {
2935                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2936                         s->mb_intra= 0;
2937                         motion_x=s->b_direct_mv_table[xy][0];
2938                         motion_y=s->b_direct_mv_table[xy][1];
2939                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2940                     }
2941                     break;
2942                 case CANDIDATE_MB_TYPE_DIRECT0:
2943                     if (CONFIG_MPEG4_ENCODER) {
2944                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2945                         s->mb_intra= 0;
2946                         ff_mpeg4_set_direct_mv(s, 0, 0);
2947                     }
2948                     break;
2949                 case CANDIDATE_MB_TYPE_BIDIR:
2950                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2951                     s->mb_intra= 0;
2952                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2953                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2954                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2955                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2956                     break;
2957                 case CANDIDATE_MB_TYPE_BACKWARD:
2958                     s->mv_dir = MV_DIR_BACKWARD;
2959                     s->mb_intra= 0;
2960                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2961                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2962                     break;
2963                 case CANDIDATE_MB_TYPE_FORWARD:
2964                     s->mv_dir = MV_DIR_FORWARD;
2965                     s->mb_intra= 0;
2966                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2967                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2968 //                    printf(" %d %d ", motion_x, motion_y);
2969                     break;
2970                 case CANDIDATE_MB_TYPE_FORWARD_I:
2971                     s->mv_dir = MV_DIR_FORWARD;
2972                     s->mv_type = MV_TYPE_FIELD;
2973                     s->mb_intra= 0;
2974                     for(i=0; i<2; i++){
2975                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2976                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2977                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2978                     }
2979                     break;
2980                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2981                     s->mv_dir = MV_DIR_BACKWARD;
2982                     s->mv_type = MV_TYPE_FIELD;
2983                     s->mb_intra= 0;
2984                     for(i=0; i<2; i++){
2985                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2986                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2987                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2988                     }
2989                     break;
2990                 case CANDIDATE_MB_TYPE_BIDIR_I:
2991                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2992                     s->mv_type = MV_TYPE_FIELD;
2993                     s->mb_intra= 0;
2994                     for(dir=0; dir<2; dir++){
2995                         for(i=0; i<2; i++){
2996                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2997                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2998                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2999                         }
3000                     }
3001                     break;
3002                 default:
3003                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3004                 }
3005
3006                 encode_mb(s, motion_x, motion_y);
3007
3008                 // RAL: Update last macroblock type
3009                 s->last_mv_dir = s->mv_dir;
3010
3011                 if (CONFIG_H263_ENCODER &&
3012                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3013                     ff_h263_update_motion_val(s);
3014
3015                 ff_MPV_decode_mb(s, s->block);
3016             }
3017
3018             /* clean the MV table in IPS frames for direct mode in B frames */
3019             if(s->mb_intra /* && I,P,S_TYPE */){
3020                 s->p_mv_table[xy][0]=0;
3021                 s->p_mv_table[xy][1]=0;
3022             }
3023
3024             if(s->flags&CODEC_FLAG_PSNR){
3025                 int w= 16;
3026                 int h= 16;
3027
3028                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3029                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3030
3031                 s->current_picture.f.error[0] += sse(
3032                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3033                     s->dest[0], w, h, s->linesize);
3034                 s->current_picture.f.error[1] += sse(
3035                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3036                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3037                 s->current_picture.f.error[2] += sse(
3038                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3039                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3040             }
3041             if(s->loop_filter){
3042                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3043                     ff_h263_loop_filter(s);
3044             }
3045 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
3046         }
3047     }
3048
3049     //not beautiful here but we must write it before flushing so it has to be here
3050     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3051         ff_msmpeg4_encode_ext_header(s);
3052
3053     write_slice_end(s);
3054
3055     /* Send the last GOB if RTP */
3056     if (s->avctx->rtp_callback) {
3057         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3058         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3059         /* Call the RTP callback to send the last GOB */
3060         emms_c();
3061         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3062     }
3063
3064     return 0;
3065 }
3066
3067 #define MERGE(field) dst->field += src->field; src->field=0
3068 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3069     MERGE(me.scene_change_score);
3070     MERGE(me.mc_mb_var_sum_temp);
3071     MERGE(me.mb_var_sum_temp);
3072 }
3073
3074 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3075     int i;
3076
3077     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3078     MERGE(dct_count[1]);
3079     MERGE(mv_bits);
3080     MERGE(i_tex_bits);
3081     MERGE(p_tex_bits);
3082     MERGE(i_count);
3083     MERGE(f_count);
3084     MERGE(b_count);
3085     MERGE(skip_count);
3086     MERGE(misc_bits);
3087     MERGE(error_count);
3088     MERGE(padding_bug_score);
3089     MERGE(current_picture.f.error[0]);
3090     MERGE(current_picture.f.error[1]);
3091     MERGE(current_picture.f.error[2]);
3092
3093     if(dst->avctx->noise_reduction){
3094         for(i=0; i<64; i++){
3095             MERGE(dct_error_sum[0][i]);
3096             MERGE(dct_error_sum[1][i]);
3097         }
3098     }
3099
3100     assert(put_bits_count(&src->pb) % 8 ==0);
3101     assert(put_bits_count(&dst->pb) % 8 ==0);
3102     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3103     flush_put_bits(&dst->pb);
3104 }
3105
3106 static int estimate_qp(MpegEncContext *s, int dry_run){
3107     if (s->next_lambda){
3108         s->current_picture_ptr->f.quality =
3109         s->current_picture.f.quality = s->next_lambda;
3110         if(!dry_run) s->next_lambda= 0;
3111     } else if (!s->fixed_qscale) {
3112         s->current_picture_ptr->f.quality =
3113         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3114         if (s->current_picture.f.quality < 0)
3115             return -1;
3116     }
3117
3118     if(s->adaptive_quant){
3119         switch(s->codec_id){
3120         case CODEC_ID_MPEG4:
3121             if (CONFIG_MPEG4_ENCODER)
3122                 ff_clean_mpeg4_qscales(s);
3123             break;
3124         case CODEC_ID_H263:
3125         case CODEC_ID_H263P:
3126         case CODEC_ID_FLV1:
3127             if (CONFIG_H263_ENCODER)
3128                 ff_clean_h263_qscales(s);
3129             break;
3130         default:
3131             ff_init_qscale_tab(s);
3132         }
3133
3134         s->lambda= s->lambda_table[0];
3135         //FIXME broken
3136     }else
3137         s->lambda = s->current_picture.f.quality;
3138 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3139     update_qscale(s);
3140     return 0;
3141 }
3142
3143 /* must be called before writing the header */
3144 static void set_frame_distances(MpegEncContext * s){
3145     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3146     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3147
3148     if(s->pict_type==AV_PICTURE_TYPE_B){
3149         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3150         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3151     }else{
3152         s->pp_time= s->time - s->last_non_b_time;
3153         s->last_non_b_time= s->time;
3154         assert(s->picture_number==0 || s->pp_time > 0);
3155     }
3156 }
3157
3158 static int encode_picture(MpegEncContext *s, int picture_number)
3159 {
3160     int i;
3161     int bits;
3162     int context_count = s->slice_context_count;
3163
3164     s->picture_number = picture_number;
3165
3166     /* Reset the average MB variance */
3167     s->me.mb_var_sum_temp    =
3168     s->me.mc_mb_var_sum_temp = 0;
3169
3170     /* we need to initialize some time vars before we can encode b-frames */
3171     // RAL: Condition added for MPEG1VIDEO
3172     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3173         set_frame_distances(s);
3174     if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4)
3175         ff_set_mpeg4_time(s);
3176
3177     s->me.scene_change_score=0;
3178
3179 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3180
3181     if(s->pict_type==AV_PICTURE_TYPE_I){
3182         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3183         else                        s->no_rounding=0;
3184     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3185         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3186             s->no_rounding ^= 1;
3187     }
3188
3189     if(s->flags & CODEC_FLAG_PASS2){
3190         if (estimate_qp(s,1) < 0)
3191             return -1;
3192         ff_get_2pass_fcode(s);
3193     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3194         if(s->pict_type==AV_PICTURE_TYPE_B)
3195             s->lambda= s->last_lambda_for[s->pict_type];
3196         else
3197             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3198         update_qscale(s);
3199     }
3200
3201     if(s->codec_id != CODEC_ID_AMV){
3202         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3203         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3204         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3205         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3206     }
3207
3208     s->mb_intra=0; //for the rate distortion & bit compare functions
3209     for(i=1; i<context_count; i++){
3210         ff_update_duplicate_context(s->thread_context[i], s);
3211     }
3212
3213     if(ff_init_me(s)<0)
3214         return -1;
3215
3216     /* Estimate motion for every MB */
3217     if(s->pict_type != AV_PICTURE_TYPE_I){
3218         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3219         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3220         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3221             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3222                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3223             }
3224         }
3225
3226         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3227     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3228         /* I-Frame */
3229         for(i=0; i<s->mb_stride*s->mb_height; i++)
3230             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3231
3232         if(!s->fixed_qscale){
3233             /* finding spatial complexity for I-frame rate control */
3234             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3235         }
3236     }
3237     for(i=1; i<context_count; i++){
3238         merge_context_after_me(s, s->thread_context[i]);
3239     }
3240     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3241     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3242     emms_c();
3243
3244     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3245         s->pict_type= AV_PICTURE_TYPE_I;
3246         for(i=0; i<s->mb_stride*s->mb_height; i++)
3247             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3248 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3249         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3250     }
3251
3252     if(!s->umvplus){
3253         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3254             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3255
3256             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3257                 int a,b;
3258                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3259                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3260                 s->f_code= FFMAX3(s->f_code, a, b);
3261             }
3262
3263             ff_fix_long_p_mvs(s);
3264             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3265             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3266                 int j;
3267                 for(i=0; i<2; i++){
3268                     for(j=0; j<2; j++)
3269                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3270                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3271                 }
3272             }
3273         }
3274
3275         if(s->pict_type==AV_PICTURE_TYPE_B){
3276             int a, b;
3277
3278             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3279             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3280             s->f_code = FFMAX(a, b);
3281
3282             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3283             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3284             s->b_code = FFMAX(a, b);
3285
3286             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3287             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3288             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3289             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3290             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3291                 int dir, j;
3292                 for(dir=0; dir<2; dir++){
3293                     for(i=0; i<2; i++){
3294                         for(j=0; j<2; j++){
3295                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3296                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3297                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3298                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3299                         }
3300                     }
3301                 }
3302             }
3303         }
3304     }
3305
3306     if (estimate_qp(s, 0) < 0)
3307         return -1;
3308
3309     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3310         s->qscale= 3; //reduce clipping problems
3311
3312     if (s->out_format == FMT_MJPEG) {
3313         /* for mjpeg, we do include qscale in the matrix */
3314         for(i=1;i<64;i++){
3315             int j= s->dsp.idct_permutation[i];
3316
3317             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3318         }
3319         s->y_dc_scale_table=
3320         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3321         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3322         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3323                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3324         s->qscale= 8;
3325     }
3326     if(s->codec_id == CODEC_ID_AMV){
3327         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3328         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3329         for(i=1;i<64;i++){
3330             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3331
3332             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3333             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3334         }
3335         s->y_dc_scale_table= y;
3336         s->c_dc_scale_table= c;
3337         s->intra_matrix[0] = 13;
3338         s->chroma_intra_matrix[0] = 14;
3339         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3340                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3341         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3342                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3343         s->qscale= 8;
3344     }
3345
3346     //FIXME var duplication
3347     s->current_picture_ptr->f.key_frame =
3348     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3349     s->current_picture_ptr->f.pict_type =
3350     s->current_picture.f.pict_type = s->pict_type;
3351
3352     if (s->current_picture.f.key_frame)
3353         s->picture_in_gop_number=0;
3354
3355     s->last_bits= put_bits_count(&s->pb);
3356     switch(s->out_format) {
3357     case FMT_MJPEG:
3358         if (CONFIG_MJPEG_ENCODER)
3359             ff_mjpeg_encode_picture_header(s);
3360         break;
3361     case FMT_H261:
3362         if (CONFIG_H261_ENCODER)
3363             ff_h261_encode_picture_header(s, picture_number);
3364         break;
3365     case FMT_H263:
3366         if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
3367             ff_wmv2_encode_picture_header(s, picture_number);
3368         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3369             ff_msmpeg4_encode_picture_header(s, picture_number);
3370         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3371             ff_mpeg4_encode_picture_header(s, picture_number);
3372         else if (CONFIG_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
3373             ff_rv10_encode_picture_header(s, picture_number);
3374         else if (CONFIG_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
3375             ff_rv20_encode_picture_header(s, picture_number);
3376         else if (CONFIG_FLV_ENCODER && s->codec_id == CODEC_ID_FLV1)
3377             ff_flv_encode_picture_header(s, picture_number);
3378         else if (CONFIG_H263_ENCODER)
3379             ff_h263_encode_picture_header(s, picture_number);
3380         break;
3381     case FMT_MPEG1:
3382         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3383             ff_mpeg1_encode_picture_header(s, picture_number);
3384         break;
3385     case FMT_H264:
3386         break;
3387     default:
3388         assert(0);
3389     }
3390     bits= put_bits_count(&s->pb);
3391     s->header_bits= bits - s->last_bits;
3392
3393     for(i=1; i<context_count; i++){
3394         update_duplicate_context_after_me(s->thread_context[i], s);
3395     }
3396     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3397     for(i=1; i<context_count; i++){
3398         merge_context_after_encode(s, s->thread_context[i]);
3399     }
3400     emms_c();
3401     return 0;
3402 }
3403
3404 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3405     const int intra= s->mb_intra;
3406     int i;
3407
3408     s->dct_count[intra]++;
3409
3410     for(i=0; i<64; i++){
3411         int level= block[i];
3412
3413         if(level){
3414             if(level>0){
3415                 s->dct_error_sum[intra][i] += level;
3416                 level -= s->dct_offset[intra][i];
3417                 if(level<0) level=0;
3418             }else{
3419                 s->dct_error_sum[intra][i] -= level;
3420                 level += s->dct_offset[intra][i];
3421                 if(level>0) level=0;
3422             }
3423             block[i]= level;
3424         }
3425     }
3426 }
3427
3428 static int dct_quantize_trellis_c(MpegEncContext *s,
3429                                   DCTELEM *block, int n,
3430                                   int qscale, int *overflow){
3431     const int *qmat;
3432     const uint8_t *scantable= s->intra_scantable.scantable;
3433     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3434     int max=0;
3435     unsigned int threshold1, threshold2;
3436     int bias=0;
3437     int run_tab[65];
3438     int level_tab[65];
3439     int score_tab[65];
3440     int survivor[65];
3441     int survivor_count;
3442     int last_run=0;
3443     int last_level=0;
3444     int last_score= 0;
3445     int last_i;
3446     int coeff[2][64];
3447     int coeff_count[64];
3448     int qmul, qadd, start_i, last_non_zero, i, dc;
3449     const int esc_length= s->ac_esc_length;
3450     uint8_t * length;
3451     uint8_t * last_length;
3452     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3453
3454     s->dsp.fdct (block);
3455
3456     if(s->dct_error_sum)
3457         s->denoise_dct(s, block);
3458     qmul= qscale*16;
3459     qadd= ((qscale-1)|1)*8;
3460
3461     if (s->mb_intra) {
3462         int q;
3463         if (!s->h263_aic) {
3464             if (n < 4)
3465                 q = s->y_dc_scale;
3466             else
3467                 q = s->c_dc_scale;
3468             q = q << 3;
3469         } else{
3470             /* For AIC we skip quant/dequant of INTRADC */
3471             q = 1 << 3;
3472             qadd=0;
3473         }
3474
3475         /* note: block[0] is assumed to be positive */
3476         block[0] = (block[0] + (q >> 1)) / q;
3477         start_i = 1;
3478         last_non_zero = 0;
3479         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3480         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3481             bias= 1<<(QMAT_SHIFT-1);
3482         length     = s->intra_ac_vlc_length;
3483         last_length= s->intra_ac_vlc_last_length;
3484     } else {
3485         start_i = 0;
3486         last_non_zero = -1;
3487         qmat = s->q_inter_matrix[qscale];
3488         length     = s->inter_ac_vlc_length;
3489         last_length= s->inter_ac_vlc_last_length;
3490     }
3491     last_i= start_i;
3492
3493     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3494     threshold2= (threshold1<<1);
3495
3496     for(i=63; i>=start_i; i--) {
3497         const int j = scantable[i];
3498         int level = block[j] * qmat[j];
3499
3500         if(((unsigned)(level+threshold1))>threshold2){
3501             last_non_zero = i;
3502             break;
3503         }
3504     }
3505
3506     for(i=start_i; i<=last_non_zero; i++) {
3507         const int j = scantable[i];
3508         int level = block[j] * qmat[j];
3509
3510 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3511 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3512         if(((unsigned)(level+threshold1))>threshold2){
3513             if(level>0){
3514                 level= (bias + level)>>QMAT_SHIFT;
3515                 coeff[0][i]= level;
3516                 coeff[1][i]= level-1;
3517 //                coeff[2][k]= level-2;
3518             }else{
3519                 level= (bias - level)>>QMAT_SHIFT;
3520                 coeff[0][i]= -level;
3521                 coeff[1][i]= -level+1;
3522 //                coeff[2][k]= -level+2;
3523             }
3524             coeff_count[i]= FFMIN(level, 2);
3525             assert(coeff_count[i]);
3526             max |=level;
3527         }else{
3528             coeff[0][i]= (level>>31)|1;
3529             coeff_count[i]= 1;
3530         }
3531     }
3532
3533     *overflow= s->max_qcoeff < max; //overflow might have happened
3534
3535     if(last_non_zero < start_i){
3536         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3537         return last_non_zero;
3538     }
3539
3540     score_tab[start_i]= 0;
3541     survivor[0]= start_i;
3542     survivor_count= 1;
3543
3544     for(i=start_i; i<=last_non_zero; i++){
3545         int level_index, j, zero_distortion;
3546         int dct_coeff= FFABS(block[ scantable[i] ]);
3547         int best_score=256*256*256*120;
3548
3549         if (s->dsp.fdct == ff_fdct_ifast)
3550             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3551         zero_distortion= dct_coeff*dct_coeff;
3552
3553         for(level_index=0; level_index < coeff_count[i]; level_index++){
3554             int distortion;
3555             int level= coeff[level_index][i];
3556             const int alevel= FFABS(level);
3557             int unquant_coeff;
3558
3559             assert(level);
3560
3561             if(s->out_format == FMT_H263){
3562                 unquant_coeff= alevel*qmul + qadd;
3563             }else{ //MPEG1
3564                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3565                 if(s->mb_intra){
3566                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3567                         unquant_coeff =   (unquant_coeff - 1) | 1;
3568                 }else{
3569                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3570                         unquant_coeff =   (unquant_coeff - 1) | 1;
3571                 }
3572                 unquant_coeff<<= 3;
3573             }
3574
3575             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3576             level+=64;
3577             if((level&(~127)) == 0){
3578                 for(j=survivor_count-1; j>=0; j--){
3579                     int run= i - survivor[j];
3580                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3581                     score += score_tab[i-run];
3582
3583                     if(score < best_score){
3584                         best_score= score;
3585                         run_tab[i+1]= run;
3586                         level_tab[i+1]= level-64;
3587                     }
3588                 }
3589
3590                 if(s->out_format == FMT_H263){
3591                     for(j=survivor_count-1; j>=0; j--){
3592                         int run= i - survivor[j];
3593                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3594                         score += score_tab[i-run];
3595                         if(score < last_score){
3596                             last_score= score;
3597                             last_run= run;
3598                             last_level= level-64;
3599                             last_i= i+1;
3600                         }
3601                     }
3602                 }
3603             }else{
3604                 distortion += esc_length*lambda;
3605                 for(j=survivor_count-1; j>=0; j--){
3606                     int run= i - survivor[j];
3607                     int score= distortion + score_tab[i-run];
3608
3609                     if(score < best_score){
3610                         best_score= score;
3611                         run_tab[i+1]= run;
3612                         level_tab[i+1]= level-64;
3613                     }
3614                 }
3615
3616                 if(s->out_format == FMT_H263){
3617                   for(j=survivor_count-1; j>=0; j--){
3618                         int run= i - survivor[j];
3619                         int score= distortion + score_tab[i-run];
3620                         if(score < last_score){
3621                             last_score= score;
3622                             last_run= run;
3623                             last_level= level-64;
3624                             last_i= i+1;
3625                         }
3626                     }
3627                 }
3628             }
3629         }
3630
3631         score_tab[i+1]= best_score;
3632
3633         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3634         if(last_non_zero <= 27){
3635             for(; survivor_count; survivor_count--){
3636                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3637                     break;
3638             }
3639         }else{
3640             for(; survivor_count; survivor_count--){
3641                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3642                     break;
3643             }
3644         }
3645
3646         survivor[ survivor_count++ ]= i+1;
3647     }
3648
3649     if(s->out_format != FMT_H263){
3650         last_score= 256*256*256*120;
3651         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3652             int score= score_tab[i];
3653             if(i) score += lambda*2; //FIXME exacter?
3654
3655             if(score < last_score){
3656                 last_score= score;
3657                 last_i= i;
3658                 last_level= level_tab[i];
3659                 last_run= run_tab[i];
3660             }
3661         }
3662     }
3663
3664     s->coded_score[n] = last_score;
3665
3666     dc= FFABS(block[0]);
3667     last_non_zero= last_i - 1;
3668     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3669
3670     if(last_non_zero < start_i)
3671         return last_non_zero;
3672
3673     if(last_non_zero == 0 && start_i == 0){
3674         int best_level= 0;
3675         int best_score= dc * dc;
3676
3677         for(i=0; i<coeff_count[0]; i++){
3678             int level= coeff[i][0];
3679             int alevel= FFABS(level);
3680             int unquant_coeff, score, distortion;
3681
3682             if(s->out_format == FMT_H263){
3683                     unquant_coeff= (alevel*qmul + qadd)>>3;
3684             }else{ //MPEG1
3685                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3686                     unquant_coeff =   (unquant_coeff - 1) | 1;
3687             }
3688             unquant_coeff = (unquant_coeff + 4) >> 3;
3689             unquant_coeff<<= 3 + 3;
3690
3691             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3692             level+=64;
3693             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3694             else                    score= distortion + esc_length*lambda;
3695
3696             if(score < best_score){
3697                 best_score= score;
3698                 best_level= level - 64;
3699             }
3700         }
3701         block[0]= best_level;
3702         s->coded_score[n] = best_score - dc*dc;
3703         if(best_level == 0) return -1;
3704         else                return last_non_zero;
3705     }
3706
3707     i= last_i;
3708     assert(last_level);
3709
3710     block[ perm_scantable[last_non_zero] ]= last_level;
3711     i -= last_run + 1;
3712
3713     for(; i>start_i; i -= run_tab[i] + 1){
3714         block[ perm_scantable[i-1] ]= level_tab[i];
3715     }
3716
3717     return last_non_zero;
3718 }
3719
3720 //#define REFINE_STATS 1
3721 static int16_t basis[64][64];
3722
3723 static void build_basis(uint8_t *perm){
3724     int i, j, x, y;
3725     emms_c();
3726     for(i=0; i<8; i++){
3727         for(j=0; j<8; j++){
3728             for(y=0; y<8; y++){
3729                 for(x=0; x<8; x++){
3730                     double s= 0.25*(1<<BASIS_SHIFT);
3731                     int index= 8*i + j;
3732                     int perm_index= perm[index];
3733                     if(i==0) s*= sqrt(0.5);
3734                     if(j==0) s*= sqrt(0.5);
3735                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3736                 }
3737             }
3738         }
3739     }
3740 }
3741
3742 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3743                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3744                         int n, int qscale){
3745     int16_t rem[64];
3746     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3747     const uint8_t *scantable= s->intra_scantable.scantable;
3748     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3749 //    unsigned int threshold1, threshold2;
3750 //    int bias=0;
3751     int run_tab[65];
3752     int prev_run=0;
3753     int prev_level=0;
3754     int qmul, qadd, start_i, last_non_zero, i, dc;
3755     uint8_t * length;
3756     uint8_t * last_length;
3757     int lambda;
3758     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3759 #ifdef REFINE_STATS
3760 static int count=0;
3761 static int after_last=0;
3762 static int to_zero=0;
3763 static int from_zero=0;
3764 static int raise=0;
3765 static int lower=0;
3766 static int messed_sign=0;
3767 #endif
3768
3769     if(basis[0][0] == 0)
3770         build_basis(s->dsp.idct_permutation);
3771
3772     qmul= qscale*2;
3773     qadd= (qscale-1)|1;
3774     if (s->mb_intra) {
3775         if (!s->h263_aic) {
3776             if (n < 4)
3777                 q = s->y_dc_scale;
3778             else
3779                 q = s->c_dc_scale;
3780         } else{
3781             /* For AIC we skip quant/dequant of INTRADC */
3782             q = 1;
3783             qadd=0;
3784         }
3785         q <<= RECON_SHIFT-3;
3786         /* note: block[0] is assumed to be positive */
3787         dc= block[0]*q;
3788 //        block[0] = (block[0] + (q >> 1)) / q;
3789         start_i = 1;
3790 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3791 //            bias= 1<<(QMAT_SHIFT-1);
3792         length     = s->intra_ac_vlc_length;
3793         last_length= s->intra_ac_vlc_last_length;
3794     } else {
3795         dc= 0;
3796         start_i = 0;
3797         length     = s->inter_ac_vlc_length;
3798         last_length= s->inter_ac_vlc_last_length;
3799     }
3800     last_non_zero = s->block_last_index[n];
3801
3802 #ifdef REFINE_STATS
3803 {START_TIMER
3804 #endif
3805     dc += (1<<(RECON_SHIFT-1));
3806     for(i=0; i<64; i++){
3807         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3808     }
3809 #ifdef REFINE_STATS
3810 STOP_TIMER("memset rem[]")}
3811 #endif
3812     sum=0;
3813     for(i=0; i<64; i++){
3814         int one= 36;
3815         int qns=4;
3816         int w;
3817
3818         w= FFABS(weight[i]) + qns*one;
3819         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3820
3821         weight[i] = w;
3822 //        w=weight[i] = (63*qns + (w/2)) / w;
3823
3824         assert(w>0);
3825         assert(w<(1<<6));
3826         sum += w*w;
3827     }
3828     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3829 #ifdef REFINE_STATS
3830 {START_TIMER
3831 #endif
3832     run=0;
3833     rle_index=0;
3834     for(i=start_i; i<=last_non_zero; i++){
3835         int j= perm_scantable[i];
3836         const int level= block[j];
3837         int coeff;
3838
3839         if(level){
3840             if(level<0) coeff= qmul*level - qadd;
3841             else        coeff= qmul*level + qadd;
3842             run_tab[rle_index++]=run;
3843             run=0;
3844
3845             s->dsp.add_8x8basis(rem, basis[j], coeff);
3846         }else{
3847             run++;
3848         }
3849     }
3850 #ifdef REFINE_STATS
3851 if(last_non_zero>0){
3852 STOP_TIMER("init rem[]")
3853 }
3854 }
3855
3856 {START_TIMER
3857 #endif
3858     for(;;){
3859         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3860         int best_coeff=0;
3861         int best_change=0;
3862         int run2, best_unquant_change=0, analyze_gradient;
3863 #ifdef REFINE_STATS
3864 {START_TIMER
3865 #endif
3866         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3867
3868         if(analyze_gradient){
3869 #ifdef REFINE_STATS
3870 {START_TIMER
3871 #endif
3872             for(i=0; i<64; i++){
3873                 int w= weight[i];
3874
3875                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3876             }
3877 #ifdef REFINE_STATS
3878 STOP_TIMER("rem*w*w")}
3879 {START_TIMER
3880 #endif
3881             s->dsp.fdct(d1);
3882 #ifdef REFINE_STATS
3883 STOP_TIMER("dct")}
3884 #endif
3885         }
3886
3887         if(start_i){
3888             const int level= block[0];
3889             int change, old_coeff;
3890
3891             assert(s->mb_intra);
3892
3893             old_coeff= q*level;
3894
3895             for(change=-1; change<=1; change+=2){
3896                 int new_level= level + change;
3897                 int score, new_coeff;
3898
3899                 new_coeff= q*new_level;
3900                 if(new_coeff >= 2048 || new_coeff < 0)
3901                     continue;
3902
3903                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3904                 if(score<best_score){
3905                     best_score= score;
3906                     best_coeff= 0;
3907                     best_change= change;
3908                     best_unquant_change= new_coeff - old_coeff;
3909                 }
3910             }
3911         }
3912
3913         run=0;
3914         rle_index=0;
3915         run2= run_tab[rle_index++];
3916         prev_level=0;
3917         prev_run=0;
3918
3919         for(i=start_i; i<64; i++){
3920             int j= perm_scantable[i];
3921             const int level= block[j];
3922             int change, old_coeff;
3923
3924             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3925                 break;
3926
3927             if(level){
3928                 if(level<0) old_coeff= qmul*level - qadd;
3929                 else        old_coeff= qmul*level + qadd;
3930                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3931             }else{
3932                 old_coeff=0;
3933                 run2--;
3934                 assert(run2>=0 || i >= last_non_zero );
3935             }
3936
3937             for(change=-1; change<=1; change+=2){
3938                 int new_level= level + change;
3939                 int score, new_coeff, unquant_change;
3940
3941                 score=0;
3942                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3943                    continue;
3944
3945                 if(new_level){
3946                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3947                     else            new_coeff= qmul*new_level + qadd;
3948                     if(new_coeff >= 2048 || new_coeff <= -2048)
3949                         continue;
3950                     //FIXME check for overflow
3951
3952                     if(level){
3953                         if(level < 63 && level > -63){
3954                             if(i < last_non_zero)
3955                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3956                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3957                             else
3958                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3959                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3960                         }
3961                     }else{
3962                         assert(FFABS(new_level)==1);
3963
3964                         if(analyze_gradient){
3965                             int g= d1[ scantable[i] ];
3966                             if(g && (g^new_level) >= 0)
3967                                 continue;
3968                         }
3969
3970                         if(i < last_non_zero){
3971                             int next_i= i + run2 + 1;
3972                             int next_level= block[ perm_scantable[next_i] ] + 64;
3973
3974                             if(next_level&(~127))
3975                                 next_level= 0;
3976
3977                             if(next_i < last_non_zero)
3978                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3979                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3980                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3981                             else
3982                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3983                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3984                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3985                         }else{
3986                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3987                             if(prev_level){
3988                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3989                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3990                             }
3991                         }
3992                     }
3993                 }else{
3994                     new_coeff=0;
3995                     assert(FFABS(level)==1);
3996
3997                     if(i < last_non_zero){
3998                         int next_i= i + run2 + 1;
3999                         int next_level= block[ perm_scantable[next_i] ] + 64;
4000
4001                         if(next_level&(~127))
4002                             next_level= 0;
4003
4004                         if(next_i < last_non_zero)
4005                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4006                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4007                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4008                         else
4009                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4010                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4011                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4012                     }else{
4013                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4014                         if(prev_level){
4015                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4016                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4017                         }
4018                     }
4019                 }
4020
4021                 score *= lambda;
4022
4023                 unquant_change= new_coeff - old_coeff;
4024                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4025
4026                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4027                 if(score<best_score){
4028                     best_score= score;
4029                     best_coeff= i;
4030                     best_change= change;
4031                     best_unquant_change= unquant_change;
4032                 }
4033             }
4034             if(level){
4035                 prev_level= level + 64;
4036                 if(prev_level&(~127))
4037                     prev_level= 0;
4038                 prev_run= run;
4039                 run=0;
4040             }else{
4041                 run++;
4042             }
4043         }
4044 #ifdef REFINE_STATS
4045 STOP_TIMER("iterative step")}
4046 #endif
4047
4048         if(best_change){
4049             int j= perm_scantable[ best_coeff ];
4050
4051             block[j] += best_change;
4052
4053             if(best_coeff > last_non_zero){
4054                 last_non_zero= best_coeff;
4055                 assert(block[j]);
4056 #ifdef REFINE_STATS
4057 after_last++;
4058 #endif
4059             }else{
4060 #ifdef REFINE_STATS
4061 if(block[j]){
4062     if(block[j] - best_change){
4063         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4064             raise++;
4065         }else{
4066             lower++;
4067         }
4068     }else{
4069         from_zero++;
4070     }
4071 }else{
4072     to_zero++;
4073 }
4074 #endif
4075                 for(; last_non_zero>=start_i; last_non_zero--){
4076                     if(block[perm_scantable[last_non_zero]])
4077                         break;
4078                 }
4079             }
4080 #ifdef REFINE_STATS
4081 count++;
4082 if(256*256*256*64 % count == 0){
4083     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4084 }
4085 #endif
4086             run=0;
4087             rle_index=0;
4088             for(i=start_i; i<=last_non_zero; i++){
4089                 int j= perm_scantable[i];
4090                 const int level= block[j];
4091
4092                  if(level){
4093                      run_tab[rle_index++]=run;
4094                      run=0;
4095                  }else{
4096                      run++;
4097                  }
4098             }
4099
4100             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4101         }else{
4102             break;
4103         }
4104     }
4105 #ifdef REFINE_STATS
4106 if(last_non_zero>0){
4107 STOP_TIMER("iterative search")
4108 }
4109 }
4110 #endif
4111
4112     return last_non_zero;
4113 }
4114
4115 int ff_dct_quantize_c(MpegEncContext *s,
4116                         DCTELEM *block, int n,
4117                         int qscale, int *overflow)
4118 {
4119     int i, j, level, last_non_zero, q, start_i;
4120     const int *qmat;
4121     const uint8_t *scantable= s->intra_scantable.scantable;
4122     int bias;
4123     int max=0;
4124     unsigned int threshold1, threshold2;
4125
4126     s->dsp.fdct (block);
4127
4128     if(s->dct_error_sum)
4129         s->denoise_dct(s, block);
4130
4131     if (s->mb_intra) {
4132         if (!s->h263_aic) {
4133             if (n < 4)
4134                 q = s->y_dc_scale;
4135             else
4136                 q = s->c_dc_scale;
4137             q = q << 3;
4138         } else
4139             /* For AIC we skip quant/dequant of INTRADC */
4140             q = 1 << 3;
4141
4142         /* note: block[0] is assumed to be positive */
4143         block[0] = (block[0] + (q >> 1)) / q;
4144         start_i = 1;
4145         last_non_zero = 0;
4146         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4147         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4148     } else {
4149         start_i = 0;
4150         last_non_zero = -1;
4151         qmat = s->q_inter_matrix[qscale];
4152         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4153     }
4154     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4155     threshold2= (threshold1<<1);
4156     for(i=63;i>=start_i;i--) {
4157         j = scantable[i];
4158         level = block[j] * qmat[j];
4159
4160         if(((unsigned)(level+threshold1))>threshold2){
4161             last_non_zero = i;
4162             break;
4163         }else{
4164             block[j]=0;
4165         }
4166     }
4167     for(i=start_i; i<=last_non_zero; i++) {
4168         j = scantable[i];
4169         level = block[j] * qmat[j];
4170
4171 //        if(   bias+level >= (1<<QMAT_SHIFT)
4172 //           || bias-level >= (1<<QMAT_SHIFT)){
4173         if(((unsigned)(level+threshold1))>threshold2){
4174             if(level>0){
4175                 level= (bias + level)>>QMAT_SHIFT;
4176                 block[j]= level;
4177             }else{
4178                 level= (bias - level)>>QMAT_SHIFT;
4179                 block[j]= -level;
4180             }
4181             max |=level;
4182         }else{
4183             block[j]=0;
4184         }
4185     }
4186     *overflow= s->max_qcoeff < max; //overflow might have happened
4187
4188     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4189     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4190         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4191
4192     return last_non_zero;
4193 }
4194
4195 #define OFFSET(x) offsetof(MpegEncContext, x)
4196 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4197 static const AVOption h263_options[] = {
4198     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4199     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4200     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { 0 }, 0, INT_MAX, VE },
4201     FF_MPV_COMMON_OPTS
4202     { NULL },
4203 };
4204
4205 static const AVClass h263_class = {
4206     .class_name = "H.263 encoder",
4207     .item_name  = av_default_item_name,
4208     .option     = h263_options,
4209     .version    = LIBAVUTIL_VERSION_INT,
4210 };
4211
4212 AVCodec ff_h263_encoder = {
4213     .name           = "h263",
4214     .type           = AVMEDIA_TYPE_VIDEO,
4215     .id             = CODEC_ID_H263,
4216     .priv_data_size = sizeof(MpegEncContext),
4217     .init           = ff_MPV_encode_init,
4218     .encode2        = ff_MPV_encode_picture,
4219     .close          = ff_MPV_encode_end,
4220     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4221     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4222     .priv_class     = &h263_class,
4223 };
4224
4225 static const AVOption h263p_options[] = {
4226     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4227     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4228     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4229     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4230     FF_MPV_COMMON_OPTS
4231     { NULL },
4232 };
4233 static const AVClass h263p_class = {
4234     .class_name = "H.263p encoder",
4235     .item_name  = av_default_item_name,
4236     .option     = h263p_options,
4237     .version    = LIBAVUTIL_VERSION_INT,
4238 };
4239
4240 AVCodec ff_h263p_encoder = {
4241     .name           = "h263p",
4242     .type           = AVMEDIA_TYPE_VIDEO,
4243     .id             = CODEC_ID_H263P,
4244     .priv_data_size = sizeof(MpegEncContext),
4245     .init           = ff_MPV_encode_init,
4246     .encode2        = ff_MPV_encode_picture,
4247     .close          = ff_MPV_encode_end,
4248     .capabilities   = CODEC_CAP_SLICE_THREADS,
4249     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4250     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4251     .priv_class     = &h263p_class,
4252 };
4253
4254 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4255
4256 AVCodec ff_msmpeg4v2_encoder = {
4257     .name           = "msmpeg4v2",
4258     .type           = AVMEDIA_TYPE_VIDEO,
4259     .id             = CODEC_ID_MSMPEG4V2,
4260     .priv_data_size = sizeof(MpegEncContext),
4261     .init           = ff_MPV_encode_init,
4262     .encode2        = ff_MPV_encode_picture,
4263     .close          = ff_MPV_encode_end,
4264     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4265     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4266     .priv_class     = &msmpeg4v2_class,
4267 };
4268
4269 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4270
4271 AVCodec ff_msmpeg4v3_encoder = {
4272     .name           = "msmpeg4",
4273     .type           = AVMEDIA_TYPE_VIDEO,
4274     .id             = CODEC_ID_MSMPEG4V3,
4275     .priv_data_size = sizeof(MpegEncContext),
4276     .init           = ff_MPV_encode_init,
4277     .encode2        = ff_MPV_encode_picture,
4278     .close          = ff_MPV_encode_end,
4279     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4280     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4281     .priv_class     = &msmpeg4v3_class,
4282 };
4283
4284 FF_MPV_GENERIC_CLASS(wmv1)
4285
4286 AVCodec ff_wmv1_encoder = {
4287     .name           = "wmv1",
4288     .type           = AVMEDIA_TYPE_VIDEO,
4289     .id             = CODEC_ID_WMV1,
4290     .priv_data_size = sizeof(MpegEncContext),
4291     .init           = ff_MPV_encode_init,
4292     .encode2        = ff_MPV_encode_picture,
4293     .close          = ff_MPV_encode_end,
4294     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4295     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4296     .priv_class     = &wmv1_class,
4297 };