]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge remote-tracking branch 'qatar/master'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "h263.h"
37 #include "mjpegenc.h"
38 #include "msmpeg4.h"
39 #include "faandct.h"
40 #include "thread.h"
41 #include "aandcttab.h"
42 #include "flv.h"
43 #include "mpeg4video.h"
44 #include "internal.h"
45 #include "bytestream.h"
46 #include <limits.h>
47 #include "sp5x.h"
48
49 //#undef NDEBUG
50 //#include <assert.h>
51
52 static int encode_picture(MpegEncContext *s, int picture_number);
53 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
54 static int sse_mb(MpegEncContext *s);
55 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
56 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
57
58 /* enable all paranoid tests for rounding, overflows, etc... */
59 //#define PARANOID
60
61 //#define DEBUG
62
63 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
64 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
65
66 const AVOption ff_mpv_generic_options[] = {
67     FF_MPV_COMMON_OPTS
68     { NULL },
69 };
70
71 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
72                        uint16_t (*qmat16)[2][64],
73                        const uint16_t *quant_matrix,
74                        int bias, int qmin, int qmax, int intra)
75 {
76     int qscale;
77     int shift = 0;
78
79     for (qscale = qmin; qscale <= qmax; qscale++) {
80         int i;
81         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
82             dsp->fdct == ff_jpeg_fdct_islow_10 ||
83             dsp->fdct == ff_faandct) {
84             for (i = 0; i < 64; i++) {
85                 const int j = dsp->idct_permutation[i];
86                 /* 16 <= qscale * quant_matrix[i] <= 7905
87                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
88                  *             19952 <=              x  <= 249205026
89                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
90                  *           3444240 >= (1 << 36) / (x) >= 275 */
91
92                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
93                                         (qscale * quant_matrix[j]));
94             }
95         } else if (dsp->fdct == ff_fdct_ifast) {
96             for (i = 0; i < 64; i++) {
97                 const int j = dsp->idct_permutation[i];
98                 /* 16 <= qscale * quant_matrix[i] <= 7905
99                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
100                  *             19952 <=              x  <= 249205026
101                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
102                  *           3444240 >= (1 << 36) / (x) >= 275 */
103
104                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
105                                         (ff_aanscales[i] * qscale * quant_matrix[j]));
106             }
107         } else {
108             for (i = 0; i < 64; i++) {
109                 const int j = dsp->idct_permutation[i];
110                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
111                  * Assume x = qscale * quant_matrix[i]
112                  * So             16 <=              x  <= 7905
113                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
114                  * so          32768 >= (1 << 19) / (x) >= 67 */
115                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
116                                         (qscale * quant_matrix[j]));
117                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
118                 //                    (qscale * quant_matrix[i]);
119                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
120                                        (qscale * quant_matrix[j]);
121
122                 if (qmat16[qscale][0][i] == 0 ||
123                     qmat16[qscale][0][i] == 128 * 256)
124                     qmat16[qscale][0][i] = 128 * 256 - 1;
125                 qmat16[qscale][1][i] =
126                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
127                                 qmat16[qscale][0][i]);
128             }
129         }
130
131         for (i = intra; i < 64; i++) {
132             int64_t max = 8191;
133             if (dsp->fdct == ff_fdct_ifast) {
134                 max = (8191LL * ff_aanscales[i]) >> 14;
135             }
136             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
137                 shift++;
138             }
139         }
140     }
141     if (shift) {
142         av_log(NULL, AV_LOG_INFO,
143                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
144                QMAT_SHIFT - shift);
145     }
146 }
147
148 static inline void update_qscale(MpegEncContext *s)
149 {
150     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
151                 (FF_LAMBDA_SHIFT + 7);
152     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
153
154     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
155                  FF_LAMBDA_SHIFT;
156 }
157
158 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
159 {
160     int i;
161
162     if (matrix) {
163         put_bits(pb, 1, 1);
164         for (i = 0; i < 64; i++) {
165             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
166         }
167     } else
168         put_bits(pb, 1, 0);
169 }
170
171 /**
172  * init s->current_picture.qscale_table from s->lambda_table
173  */
174 void ff_init_qscale_tab(MpegEncContext *s)
175 {
176     int8_t * const qscale_table = s->current_picture.f.qscale_table;
177     int i;
178
179     for (i = 0; i < s->mb_num; i++) {
180         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
181         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
182         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
183                                                   s->avctx->qmax);
184     }
185 }
186
187 static void copy_picture_attributes(MpegEncContext *s,
188                                     AVFrame *dst,
189                                     AVFrame *src)
190 {
191     int i;
192
193     dst->pict_type              = src->pict_type;
194     dst->quality                = src->quality;
195     dst->coded_picture_number   = src->coded_picture_number;
196     dst->display_picture_number = src->display_picture_number;
197     //dst->reference              = src->reference;
198     dst->pts                    = src->pts;
199     dst->interlaced_frame       = src->interlaced_frame;
200     dst->top_field_first        = src->top_field_first;
201
202     if (s->avctx->me_threshold) {
203         if (!src->motion_val[0])
204             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
205         if (!src->mb_type)
206             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
207         if (!src->ref_index[0])
208             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
209         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
210             av_log(s->avctx, AV_LOG_ERROR,
211                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
212                    src->motion_subsample_log2, dst->motion_subsample_log2);
213
214         memcpy(dst->mb_type, src->mb_type,
215                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
216
217         for (i = 0; i < 2; i++) {
218             int stride = ((16 * s->mb_width ) >>
219                           src->motion_subsample_log2) + 1;
220             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
221
222             if (src->motion_val[i] &&
223                 src->motion_val[i] != dst->motion_val[i]) {
224                 memcpy(dst->motion_val[i], src->motion_val[i],
225                        2 * stride * height * sizeof(int16_t));
226             }
227             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
228                 memcpy(dst->ref_index[i], src->ref_index[i],
229                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
230             }
231         }
232     }
233 }
234
235 static void update_duplicate_context_after_me(MpegEncContext *dst,
236                                               MpegEncContext *src)
237 {
238 #define COPY(a) dst->a= src->a
239     COPY(pict_type);
240     COPY(current_picture);
241     COPY(f_code);
242     COPY(b_code);
243     COPY(qscale);
244     COPY(lambda);
245     COPY(lambda2);
246     COPY(picture_in_gop_number);
247     COPY(gop_picture_number);
248     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
249     COPY(progressive_frame);    // FIXME don't set in encode_header
250     COPY(partitioned_frame);    // FIXME don't set in encode_header
251 #undef COPY
252 }
253
254 /**
255  * Set the given MpegEncContext to defaults for encoding.
256  * the changed fields will not depend upon the prior state of the MpegEncContext.
257  */
258 static void MPV_encode_defaults(MpegEncContext *s)
259 {
260     int i;
261     ff_MPV_common_defaults(s);
262
263     for (i = -16; i < 16; i++) {
264         default_fcode_tab[i + MAX_MV] = 1;
265     }
266     s->me.mv_penalty = default_mv_penalty;
267     s->fcode_tab     = default_fcode_tab;
268 }
269
270 /* init video encoder */
271 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
272 {
273     MpegEncContext *s = avctx->priv_data;
274     int i;
275     int chroma_h_shift, chroma_v_shift;
276
277     MPV_encode_defaults(s);
278
279     switch (avctx->codec_id) {
280     case AV_CODEC_ID_MPEG2VIDEO:
281         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
282             avctx->pix_fmt != PIX_FMT_YUV422P) {
283             av_log(avctx, AV_LOG_ERROR,
284                    "only YUV420 and YUV422 are supported\n");
285             return -1;
286         }
287         break;
288     case AV_CODEC_ID_LJPEG:
289         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
290             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
291             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
292             avctx->pix_fmt != PIX_FMT_BGR0     &&
293             avctx->pix_fmt != PIX_FMT_BGRA     &&
294             avctx->pix_fmt != PIX_FMT_BGR24    &&
295             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
296               avctx->pix_fmt != PIX_FMT_YUV422P &&
297               avctx->pix_fmt != PIX_FMT_YUV444P) ||
298              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
299             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
300             return -1;
301         }
302         break;
303     case AV_CODEC_ID_MJPEG:
304     case AV_CODEC_ID_AMV:
305         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
306             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
307             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
308               avctx->pix_fmt != PIX_FMT_YUV422P) ||
309              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
310             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
311             return -1;
312         }
313         break;
314     default:
315         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
316             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
317             return -1;
318         }
319     }
320
321     switch (avctx->pix_fmt) {
322     case PIX_FMT_YUVJ422P:
323     case PIX_FMT_YUV422P:
324         s->chroma_format = CHROMA_422;
325         break;
326     case PIX_FMT_YUVJ420P:
327     case PIX_FMT_YUV420P:
328     default:
329         s->chroma_format = CHROMA_420;
330         break;
331     }
332
333     s->bit_rate = avctx->bit_rate;
334     s->width    = avctx->width;
335     s->height   = avctx->height;
336     if (avctx->gop_size > 600 &&
337         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
338         av_log(avctx, AV_LOG_WARNING,
339                "keyframe interval too large!, reducing it from %d to %d\n",
340                avctx->gop_size, 600);
341         avctx->gop_size = 600;
342     }
343     s->gop_size     = avctx->gop_size;
344     s->avctx        = avctx;
345     s->flags        = avctx->flags;
346     s->flags2       = avctx->flags2;
347     s->max_b_frames = avctx->max_b_frames;
348     s->codec_id     = avctx->codec->id;
349 #if FF_API_MPV_GLOBAL_OPTS
350     if (avctx->luma_elim_threshold)
351         s->luma_elim_threshold   = avctx->luma_elim_threshold;
352     if (avctx->chroma_elim_threshold)
353         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
354 #endif
355     s->strict_std_compliance = avctx->strict_std_compliance;
356     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
357     s->mpeg_quant         = avctx->mpeg_quant;
358     s->rtp_mode           = !!avctx->rtp_payload_size;
359     s->intra_dc_precision = avctx->intra_dc_precision;
360     s->user_specified_pts = AV_NOPTS_VALUE;
361
362     if (s->gop_size <= 1) {
363         s->intra_only = 1;
364         s->gop_size   = 12;
365     } else {
366         s->intra_only = 0;
367     }
368
369     s->me_method = avctx->me_method;
370
371     /* Fixed QSCALE */
372     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
373
374 #if FF_API_MPV_GLOBAL_OPTS
375     if (s->flags & CODEC_FLAG_QP_RD)
376         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
377 #endif
378
379     s->adaptive_quant = (s->avctx->lumi_masking ||
380                          s->avctx->dark_masking ||
381                          s->avctx->temporal_cplx_masking ||
382                          s->avctx->spatial_cplx_masking  ||
383                          s->avctx->p_masking      ||
384                          s->avctx->border_masking ||
385                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
386                         !s->fixed_qscale;
387
388     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
389
390     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
391         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
392         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
393             return -1;
394     }
395
396     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
397         av_log(avctx, AV_LOG_INFO,
398                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
399     }
400
401     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
402         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
403         return -1;
404     }
405
406     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
407         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
408         return -1;
409     }
410
411     if (avctx->rc_max_rate &&
412         avctx->rc_max_rate == avctx->bit_rate &&
413         avctx->rc_max_rate != avctx->rc_min_rate) {
414         av_log(avctx, AV_LOG_INFO,
415                "impossible bitrate constraints, this will fail\n");
416     }
417
418     if (avctx->rc_buffer_size &&
419         avctx->bit_rate * (int64_t)avctx->time_base.num >
420             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
421         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
422         return -1;
423     }
424
425     if (!s->fixed_qscale &&
426         avctx->bit_rate * av_q2d(avctx->time_base) >
427             avctx->bit_rate_tolerance) {
428         av_log(avctx, AV_LOG_ERROR,
429                "bitrate tolerance too small for bitrate\n");
430         return -1;
431     }
432
433     if (s->avctx->rc_max_rate &&
434         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
435         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
436          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
437         90000LL * (avctx->rc_buffer_size - 1) >
438             s->avctx->rc_max_rate * 0xFFFFLL) {
439         av_log(avctx, AV_LOG_INFO,
440                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
441                "specified vbv buffer is too large for the given bitrate!\n");
442     }
443
444     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
445         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
446         s->codec_id != AV_CODEC_ID_FLV1) {
447         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
448         return -1;
449     }
450
451     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
452         av_log(avctx, AV_LOG_ERROR,
453                "OBMC is only supported with simple mb decision\n");
454         return -1;
455     }
456
457     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
458         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
459         return -1;
460     }
461
462     if (s->max_b_frames                    &&
463         s->codec_id != AV_CODEC_ID_MPEG4      &&
464         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
465         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
466         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
467         return -1;
468     }
469
470     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
471          s->codec_id == AV_CODEC_ID_H263  ||
472          s->codec_id == AV_CODEC_ID_H263P) &&
473         (avctx->sample_aspect_ratio.num > 255 ||
474          avctx->sample_aspect_ratio.den > 255)) {
475         av_log(avctx, AV_LOG_WARNING,
476                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
477                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
478         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
479                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
480     }
481
482     if ((s->codec_id == AV_CODEC_ID_H263  ||
483          s->codec_id == AV_CODEC_ID_H263P) &&
484         (avctx->width  > 2048 ||
485          avctx->height > 1152 )) {
486         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
487         return -1;
488     }
489     if ((s->codec_id == AV_CODEC_ID_H263  ||
490          s->codec_id == AV_CODEC_ID_H263P) &&
491         ((avctx->width &3) ||
492          (avctx->height&3) )) {
493         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
494         return -1;
495     }
496
497     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
498         (avctx->width  > 4095 ||
499          avctx->height > 4095 )) {
500         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
501         return -1;
502     }
503
504     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
505         (avctx->width  > 16383 ||
506          avctx->height > 16383 )) {
507         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
508         return -1;
509     }
510
511     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
512          s->codec_id == AV_CODEC_ID_WMV2) &&
513          avctx->width & 1) {
514          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
515          return -1;
516     }
517
518     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
519         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
520         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
521         return -1;
522     }
523
524     // FIXME mpeg2 uses that too
525     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
526         av_log(avctx, AV_LOG_ERROR,
527                "mpeg2 style quantization not supported by codec\n");
528         return -1;
529     }
530
531 #if FF_API_MPV_GLOBAL_OPTS
532     if (s->flags & CODEC_FLAG_CBP_RD)
533         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
534 #endif
535
536     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
537         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
538         return -1;
539     }
540
541     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
542         s->avctx->mb_decision != FF_MB_DECISION_RD) {
543         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
544         return -1;
545     }
546
547     if (s->avctx->scenechange_threshold < 1000000000 &&
548         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
549         av_log(avctx, AV_LOG_ERROR,
550                "closed gop with scene change detection are not supported yet, "
551                "set threshold to 1000000000\n");
552         return -1;
553     }
554
555     if (s->flags & CODEC_FLAG_LOW_DELAY) {
556         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
557             av_log(avctx, AV_LOG_ERROR,
558                   "low delay forcing is only available for mpeg2\n");
559             return -1;
560         }
561         if (s->max_b_frames != 0) {
562             av_log(avctx, AV_LOG_ERROR,
563                    "b frames cannot be used with low delay\n");
564             return -1;
565         }
566     }
567
568     if (s->q_scale_type == 1) {
569         if (avctx->qmax > 12) {
570             av_log(avctx, AV_LOG_ERROR,
571                    "non linear quant only supports qmax <= 12 currently\n");
572             return -1;
573         }
574     }
575
576     if (s->avctx->thread_count > 1         &&
577         s->codec_id != AV_CODEC_ID_MPEG4      &&
578         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
579         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
580         s->codec_id != AV_CODEC_ID_MJPEG      &&
581         (s->codec_id != AV_CODEC_ID_H263P)) {
582         av_log(avctx, AV_LOG_ERROR,
583                "multi threaded encoding not supported by codec\n");
584         return -1;
585     }
586
587     if (s->avctx->thread_count < 1) {
588         av_log(avctx, AV_LOG_ERROR,
589                "automatic thread number detection not supported by codec, "
590                "patch welcome\n");
591         return -1;
592     }
593
594     if (s->avctx->thread_count > 1)
595         s->rtp_mode = 1;
596
597     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
598         s->h263_slice_structured = 1;
599
600     if (!avctx->time_base.den || !avctx->time_base.num) {
601         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
602         return -1;
603     }
604
605     i = (INT_MAX / 2 + 128) >> 8;
606     if (avctx->me_threshold >= i) {
607         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
608                i - 1);
609         return -1;
610     }
611     if (avctx->mb_threshold >= i) {
612         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
613                i - 1);
614         return -1;
615     }
616
617     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
618         av_log(avctx, AV_LOG_INFO,
619                "notice: b_frame_strategy only affects the first pass\n");
620         avctx->b_frame_strategy = 0;
621     }
622
623     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
624     if (i > 1) {
625         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
626         avctx->time_base.den /= i;
627         avctx->time_base.num /= i;
628         //return -1;
629     }
630
631     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
632         // (a + x * 3 / 8) / x
633         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
634         s->inter_quant_bias = 0;
635     } else {
636         s->intra_quant_bias = 0;
637         // (a - x / 4) / x
638         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
639     }
640
641     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
642         s->intra_quant_bias = avctx->intra_quant_bias;
643     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
644         s->inter_quant_bias = avctx->inter_quant_bias;
645
646     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
647
648     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
649                                   &chroma_v_shift);
650
651     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
652         s->avctx->time_base.den > (1 << 16) - 1) {
653         av_log(avctx, AV_LOG_ERROR,
654                "timebase %d/%d not supported by MPEG 4 standard, "
655                "the maximum admitted value for the timebase denominator "
656                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
657                (1 << 16) - 1);
658         return -1;
659     }
660     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
661
662 #if FF_API_MPV_GLOBAL_OPTS
663     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
664         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
665     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
666         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
667     if (avctx->quantizer_noise_shaping)
668         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
669 #endif
670
671     switch (avctx->codec->id) {
672     case AV_CODEC_ID_MPEG1VIDEO:
673         s->out_format = FMT_MPEG1;
674         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
675         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
676         break;
677     case AV_CODEC_ID_MPEG2VIDEO:
678         s->out_format = FMT_MPEG1;
679         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
680         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
681         s->rtp_mode   = 1;
682         break;
683     case AV_CODEC_ID_LJPEG:
684     case AV_CODEC_ID_MJPEG:
685     case AV_CODEC_ID_AMV:
686         s->out_format = FMT_MJPEG;
687         s->intra_only = 1; /* force intra only for jpeg */
688         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
689             (avctx->pix_fmt == PIX_FMT_BGR0
690              || s->avctx->pix_fmt == PIX_FMT_BGRA
691              || s->avctx->pix_fmt == PIX_FMT_BGR24)) {
692             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
693             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
694             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
695         } else {
696             s->mjpeg_vsample[0] = 2;
697             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
698             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
699             s->mjpeg_hsample[0] = 2;
700             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
701             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
702         }
703         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
704             ff_mjpeg_encode_init(s) < 0)
705             return -1;
706         avctx->delay = 0;
707         s->low_delay = 1;
708         break;
709     case AV_CODEC_ID_H261:
710         if (!CONFIG_H261_ENCODER)
711             return -1;
712         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
713             av_log(avctx, AV_LOG_ERROR,
714                    "The specified picture size of %dx%d is not valid for the "
715                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
716                     s->width, s->height);
717             return -1;
718         }
719         s->out_format = FMT_H261;
720         avctx->delay  = 0;
721         s->low_delay  = 1;
722         break;
723     case AV_CODEC_ID_H263:
724         if (!CONFIG_H263_ENCODER)
725             return -1;
726         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
727                              s->width, s->height) == 8) {
728             av_log(avctx, AV_LOG_ERROR,
729                    "The specified picture size of %dx%d is not valid for "
730                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
731                    "352x288, 704x576, and 1408x1152. "
732                    "Try H.263+.\n", s->width, s->height);
733             return -1;
734         }
735         s->out_format = FMT_H263;
736         avctx->delay  = 0;
737         s->low_delay  = 1;
738         break;
739     case AV_CODEC_ID_H263P:
740         s->out_format = FMT_H263;
741         s->h263_plus  = 1;
742         /* Fx */
743         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
744         s->modified_quant  = s->h263_aic;
745         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
746         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
747
748         /* /Fx */
749         /* These are just to be sure */
750         avctx->delay = 0;
751         s->low_delay = 1;
752         break;
753     case AV_CODEC_ID_FLV1:
754         s->out_format      = FMT_H263;
755         s->h263_flv        = 2; /* format = 1; 11-bit codes */
756         s->unrestricted_mv = 1;
757         s->rtp_mode  = 0; /* don't allow GOB */
758         avctx->delay = 0;
759         s->low_delay = 1;
760         break;
761     case AV_CODEC_ID_RV10:
762         s->out_format = FMT_H263;
763         avctx->delay  = 0;
764         s->low_delay  = 1;
765         break;
766     case AV_CODEC_ID_RV20:
767         s->out_format      = FMT_H263;
768         avctx->delay       = 0;
769         s->low_delay       = 1;
770         s->modified_quant  = 1;
771         s->h263_aic        = 1;
772         s->h263_plus       = 1;
773         s->loop_filter     = 1;
774         s->unrestricted_mv = 0;
775         break;
776     case AV_CODEC_ID_MPEG4:
777         s->out_format      = FMT_H263;
778         s->h263_pred       = 1;
779         s->unrestricted_mv = 1;
780         s->low_delay       = s->max_b_frames ? 0 : 1;
781         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
782         break;
783     case AV_CODEC_ID_MSMPEG4V2:
784         s->out_format      = FMT_H263;
785         s->h263_pred       = 1;
786         s->unrestricted_mv = 1;
787         s->msmpeg4_version = 2;
788         avctx->delay       = 0;
789         s->low_delay       = 1;
790         break;
791     case AV_CODEC_ID_MSMPEG4V3:
792         s->out_format        = FMT_H263;
793         s->h263_pred         = 1;
794         s->unrestricted_mv   = 1;
795         s->msmpeg4_version   = 3;
796         s->flipflop_rounding = 1;
797         avctx->delay         = 0;
798         s->low_delay         = 1;
799         break;
800     case AV_CODEC_ID_WMV1:
801         s->out_format        = FMT_H263;
802         s->h263_pred         = 1;
803         s->unrestricted_mv   = 1;
804         s->msmpeg4_version   = 4;
805         s->flipflop_rounding = 1;
806         avctx->delay         = 0;
807         s->low_delay         = 1;
808         break;
809     case AV_CODEC_ID_WMV2:
810         s->out_format        = FMT_H263;
811         s->h263_pred         = 1;
812         s->unrestricted_mv   = 1;
813         s->msmpeg4_version   = 5;
814         s->flipflop_rounding = 1;
815         avctx->delay         = 0;
816         s->low_delay         = 1;
817         break;
818     default:
819         return -1;
820     }
821
822     avctx->has_b_frames = !s->low_delay;
823
824     s->encoding = 1;
825
826     s->progressive_frame    =
827     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
828                                                 CODEC_FLAG_INTERLACED_ME) ||
829                                 s->alternate_scan);
830
831     /* init */
832     if (ff_MPV_common_init(s) < 0)
833         return -1;
834
835     if (!s->dct_quantize)
836         s->dct_quantize = ff_dct_quantize_c;
837     if (!s->denoise_dct)
838         s->denoise_dct  = denoise_dct_c;
839     s->fast_dct_quantize = s->dct_quantize;
840     if (avctx->trellis)
841         s->dct_quantize  = dct_quantize_trellis_c;
842
843     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
844         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
845
846     s->quant_precision = 5;
847
848     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
849     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
850
851     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
852         ff_h261_encode_init(s);
853     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
854         ff_h263_encode_init(s);
855     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
856         ff_msmpeg4_encode_init(s);
857     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
858         && s->out_format == FMT_MPEG1)
859         ff_mpeg1_encode_init(s);
860
861     /* init q matrix */
862     for (i = 0; i < 64; i++) {
863         int j = s->dsp.idct_permutation[i];
864         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
865             s->mpeg_quant) {
866             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
867             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
868         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
869             s->intra_matrix[j] =
870             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
871         } else {
872             /* mpeg1/2 */
873             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
874             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
875         }
876         if (s->avctx->intra_matrix)
877             s->intra_matrix[j] = s->avctx->intra_matrix[i];
878         if (s->avctx->inter_matrix)
879             s->inter_matrix[j] = s->avctx->inter_matrix[i];
880     }
881
882     /* precompute matrix */
883     /* for mjpeg, we do include qscale in the matrix */
884     if (s->out_format != FMT_MJPEG) {
885         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
886                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
887                           31, 1);
888         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
889                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
890                           31, 0);
891     }
892
893     if (ff_rate_control_init(s) < 0)
894         return -1;
895
896     return 0;
897 }
898
899 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
900 {
901     MpegEncContext *s = avctx->priv_data;
902
903     ff_rate_control_uninit(s);
904
905     ff_MPV_common_end(s);
906     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
907         s->out_format == FMT_MJPEG)
908         ff_mjpeg_encode_close(s);
909
910     av_freep(&avctx->extradata);
911
912     return 0;
913 }
914
915 static int get_sae(uint8_t *src, int ref, int stride)
916 {
917     int x,y;
918     int acc = 0;
919
920     for (y = 0; y < 16; y++) {
921         for (x = 0; x < 16; x++) {
922             acc += FFABS(src[x + y * stride] - ref);
923         }
924     }
925
926     return acc;
927 }
928
929 static int get_intra_count(MpegEncContext *s, uint8_t *src,
930                            uint8_t *ref, int stride)
931 {
932     int x, y, w, h;
933     int acc = 0;
934
935     w = s->width  & ~15;
936     h = s->height & ~15;
937
938     for (y = 0; y < h; y += 16) {
939         for (x = 0; x < w; x += 16) {
940             int offset = x + y * stride;
941             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
942                                      16);
943             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
944             int sae  = get_sae(src + offset, mean, stride);
945
946             acc += sae + 500 < sad;
947         }
948     }
949     return acc;
950 }
951
952
953 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
954 {
955     AVFrame *pic = NULL;
956     int64_t pts;
957     int i;
958     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
959                                                  (s->low_delay ? 0 : 1);
960     int direct = 1;
961
962     if (pic_arg) {
963         pts = pic_arg->pts;
964         pic_arg->display_picture_number = s->input_picture_number++;
965
966         if (pts != AV_NOPTS_VALUE) {
967             if (s->user_specified_pts != AV_NOPTS_VALUE) {
968                 int64_t time = pts;
969                 int64_t last = s->user_specified_pts;
970
971                 if (time <= last) {
972                     av_log(s->avctx, AV_LOG_ERROR,
973                            "Error, Invalid timestamp=%"PRId64", "
974                            "last=%"PRId64"\n", pts, s->user_specified_pts);
975                     return -1;
976                 }
977
978                 if (!s->low_delay && pic_arg->display_picture_number == 1)
979                     s->dts_delta = time - last;
980             }
981             s->user_specified_pts = pts;
982         } else {
983             if (s->user_specified_pts != AV_NOPTS_VALUE) {
984                 s->user_specified_pts =
985                 pts = s->user_specified_pts + 1;
986                 av_log(s->avctx, AV_LOG_INFO,
987                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
988                        pts);
989             } else {
990                 pts = pic_arg->display_picture_number;
991             }
992         }
993     }
994
995   if (pic_arg) {
996     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
997         direct = 0;
998     if (pic_arg->linesize[0] != s->linesize)
999         direct = 0;
1000     if (pic_arg->linesize[1] != s->uvlinesize)
1001         direct = 0;
1002     if (pic_arg->linesize[2] != s->uvlinesize)
1003         direct = 0;
1004
1005     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
1006     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
1007
1008     if (direct) {
1009         i = ff_find_unused_picture(s, 1);
1010         if (i < 0)
1011             return i;
1012
1013         pic = &s->picture[i].f;
1014         pic->reference = 3;
1015
1016         for (i = 0; i < 4; i++) {
1017             pic->data[i]     = pic_arg->data[i];
1018             pic->linesize[i] = pic_arg->linesize[i];
1019         }
1020         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
1021             return -1;
1022         }
1023     } else {
1024         i = ff_find_unused_picture(s, 0);
1025         if (i < 0)
1026             return i;
1027
1028         pic = &s->picture[i].f;
1029         pic->reference = 3;
1030
1031         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
1032             return -1;
1033         }
1034
1035         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1036             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1037             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1038             // empty
1039         } else {
1040             int h_chroma_shift, v_chroma_shift;
1041             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
1042                                           &v_chroma_shift);
1043
1044             for (i = 0; i < 3; i++) {
1045                 int src_stride = pic_arg->linesize[i];
1046                 int dst_stride = i ? s->uvlinesize : s->linesize;
1047                 int h_shift = i ? h_chroma_shift : 0;
1048                 int v_shift = i ? v_chroma_shift : 0;
1049                 int w = s->width  >> h_shift;
1050                 int h = s->height >> v_shift;
1051                 uint8_t *src = pic_arg->data[i];
1052                 uint8_t *dst = pic->data[i];
1053
1054                 if(s->codec_id == AV_CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)){
1055                     h= ((s->height+15)/16*16)>>v_shift;
1056                 }
1057
1058                 if (!s->avctx->rc_buffer_size)
1059                     dst += INPLACE_OFFSET;
1060
1061                 if (src_stride == dst_stride)
1062                     memcpy(dst, src, src_stride * h);
1063                 else {
1064                     while (h--) {
1065                         memcpy(dst, src, w);
1066                         dst += dst_stride;
1067                         src += src_stride;
1068                     }
1069                 }
1070             }
1071         }
1072     }
1073     copy_picture_attributes(s, pic, pic_arg);
1074     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1075   }
1076
1077     /* shift buffer entries */
1078     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1079         s->input_picture[i - 1] = s->input_picture[i];
1080
1081     s->input_picture[encoding_delay] = (Picture*) pic;
1082
1083     return 0;
1084 }
1085
1086 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1087 {
1088     int x, y, plane;
1089     int score = 0;
1090     int64_t score64 = 0;
1091
1092     for (plane = 0; plane < 3; plane++) {
1093         const int stride = p->f.linesize[plane];
1094         const int bw = plane ? 1 : 2;
1095         for (y = 0; y < s->mb_height * bw; y++) {
1096             for (x = 0; x < s->mb_width * bw; x++) {
1097                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1098                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1099                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1100                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1101
1102                 switch (s->avctx->frame_skip_exp) {
1103                 case 0: score    =  FFMAX(score, v);          break;
1104                 case 1: score   += FFABS(v);                  break;
1105                 case 2: score   += v * v;                     break;
1106                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1107                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1108                 }
1109             }
1110         }
1111     }
1112
1113     if (score)
1114         score64 = score;
1115
1116     if (score64 < s->avctx->frame_skip_threshold)
1117         return 1;
1118     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1119         return 1;
1120     return 0;
1121 }
1122
1123 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1124 {
1125     AVPacket pkt = { 0 };
1126     int ret, got_output;
1127
1128     av_init_packet(&pkt);
1129     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1130     if (ret < 0)
1131         return ret;
1132
1133     ret = pkt.size;
1134     av_free_packet(&pkt);
1135     return ret;
1136 }
1137
1138 static int estimate_best_b_count(MpegEncContext *s)
1139 {
1140     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1141     AVCodecContext *c = avcodec_alloc_context3(NULL);
1142     AVFrame input[FF_MAX_B_FRAMES + 2];
1143     const int scale = s->avctx->brd_scale;
1144     int i, j, out_size, p_lambda, b_lambda, lambda2;
1145     int64_t best_rd  = INT64_MAX;
1146     int best_b_count = -1;
1147
1148     av_assert0(scale >= 0 && scale <= 3);
1149
1150     //emms_c();
1151     //s->next_picture_ptr->quality;
1152     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1153     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1154     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1155     if (!b_lambda) // FIXME we should do this somewhere else
1156         b_lambda = p_lambda;
1157     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1158                FF_LAMBDA_SHIFT;
1159
1160     c->width        = s->width  >> scale;
1161     c->height       = s->height >> scale;
1162     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1163                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1164     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1165     c->mb_decision  = s->avctx->mb_decision;
1166     c->me_cmp       = s->avctx->me_cmp;
1167     c->mb_cmp       = s->avctx->mb_cmp;
1168     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1169     c->pix_fmt      = PIX_FMT_YUV420P;
1170     c->time_base    = s->avctx->time_base;
1171     c->max_b_frames = s->max_b_frames;
1172
1173     if (avcodec_open2(c, codec, NULL) < 0)
1174         return -1;
1175
1176     for (i = 0; i < s->max_b_frames + 2; i++) {
1177         int ysize = c->width * c->height;
1178         int csize = (c->width / 2) * (c->height / 2);
1179         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1180                                                 s->next_picture_ptr;
1181
1182         avcodec_get_frame_defaults(&input[i]);
1183         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1184         input[i].data[1]     = input[i].data[0] + ysize;
1185         input[i].data[2]     = input[i].data[1] + csize;
1186         input[i].linesize[0] = c->width;
1187         input[i].linesize[1] =
1188         input[i].linesize[2] = c->width / 2;
1189
1190         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1191             pre_input = *pre_input_ptr;
1192
1193             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1194                 pre_input.f.data[0] += INPLACE_OFFSET;
1195                 pre_input.f.data[1] += INPLACE_OFFSET;
1196                 pre_input.f.data[2] += INPLACE_OFFSET;
1197             }
1198
1199             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1200                                  pre_input.f.data[0], pre_input.f.linesize[0],
1201                                  c->width,      c->height);
1202             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1203                                  pre_input.f.data[1], pre_input.f.linesize[1],
1204                                  c->width >> 1, c->height >> 1);
1205             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1206                                  pre_input.f.data[2], pre_input.f.linesize[2],
1207                                  c->width >> 1, c->height >> 1);
1208         }
1209     }
1210
1211     for (j = 0; j < s->max_b_frames + 1; j++) {
1212         int64_t rd = 0;
1213
1214         if (!s->input_picture[j])
1215             break;
1216
1217         c->error[0] = c->error[1] = c->error[2] = 0;
1218
1219         input[0].pict_type = AV_PICTURE_TYPE_I;
1220         input[0].quality   = 1 * FF_QP2LAMBDA;
1221
1222         out_size = encode_frame(c, &input[0]);
1223
1224         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1225
1226         for (i = 0; i < s->max_b_frames + 1; i++) {
1227             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1228
1229             input[i + 1].pict_type = is_p ?
1230                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1231             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1232
1233             out_size = encode_frame(c, &input[i + 1]);
1234
1235             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1236         }
1237
1238         /* get the delayed frames */
1239         while (out_size) {
1240             out_size = encode_frame(c, NULL);
1241             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1242         }
1243
1244         rd += c->error[0] + c->error[1] + c->error[2];
1245
1246         if (rd < best_rd) {
1247             best_rd = rd;
1248             best_b_count = j;
1249         }
1250     }
1251
1252     avcodec_close(c);
1253     av_freep(&c);
1254
1255     for (i = 0; i < s->max_b_frames + 2; i++) {
1256         av_freep(&input[i].data[0]);
1257     }
1258
1259     return best_b_count;
1260 }
1261
1262 static int select_input_picture(MpegEncContext *s)
1263 {
1264     int i;
1265
1266     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1267         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1268     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1269
1270     /* set next picture type & ordering */
1271     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1272         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1273             s->next_picture_ptr == NULL || s->intra_only) {
1274             s->reordered_input_picture[0] = s->input_picture[0];
1275             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1276             s->reordered_input_picture[0]->f.coded_picture_number =
1277                 s->coded_picture_number++;
1278         } else {
1279             int b_frames;
1280
1281             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1282                 if (s->picture_in_gop_number < s->gop_size &&
1283                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1284                     // FIXME check that te gop check above is +-1 correct
1285                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1286                     //       s->input_picture[0]->f.data[0],
1287                     //       s->input_picture[0]->pts);
1288
1289                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1290                         for (i = 0; i < 4; i++)
1291                             s->input_picture[0]->f.data[i] = NULL;
1292                         s->input_picture[0]->f.type = 0;
1293                     } else {
1294                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1295                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1296
1297                         s->avctx->release_buffer(s->avctx,
1298                                                  &s->input_picture[0]->f);
1299                     }
1300
1301                     emms_c();
1302                     ff_vbv_update(s, 0);
1303
1304                     goto no_output_pic;
1305                 }
1306             }
1307
1308             if (s->flags & CODEC_FLAG_PASS2) {
1309                 for (i = 0; i < s->max_b_frames + 1; i++) {
1310                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1311
1312                     if (pict_num >= s->rc_context.num_entries)
1313                         break;
1314                     if (!s->input_picture[i]) {
1315                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1316                         break;
1317                     }
1318
1319                     s->input_picture[i]->f.pict_type =
1320                         s->rc_context.entry[pict_num].new_pict_type;
1321                 }
1322             }
1323
1324             if (s->avctx->b_frame_strategy == 0) {
1325                 b_frames = s->max_b_frames;
1326                 while (b_frames && !s->input_picture[b_frames])
1327                     b_frames--;
1328             } else if (s->avctx->b_frame_strategy == 1) {
1329                 for (i = 1; i < s->max_b_frames + 1; i++) {
1330                     if (s->input_picture[i] &&
1331                         s->input_picture[i]->b_frame_score == 0) {
1332                         s->input_picture[i]->b_frame_score =
1333                             get_intra_count(s,
1334                                             s->input_picture[i    ]->f.data[0],
1335                                             s->input_picture[i - 1]->f.data[0],
1336                                             s->linesize) + 1;
1337                     }
1338                 }
1339                 for (i = 0; i < s->max_b_frames + 1; i++) {
1340                     if (s->input_picture[i] == NULL ||
1341                         s->input_picture[i]->b_frame_score - 1 >
1342                             s->mb_num / s->avctx->b_sensitivity)
1343                         break;
1344                 }
1345
1346                 b_frames = FFMAX(0, i - 1);
1347
1348                 /* reset scores */
1349                 for (i = 0; i < b_frames + 1; i++) {
1350                     s->input_picture[i]->b_frame_score = 0;
1351                 }
1352             } else if (s->avctx->b_frame_strategy == 2) {
1353                 b_frames = estimate_best_b_count(s);
1354             } else {
1355                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1356                 b_frames = 0;
1357             }
1358
1359             emms_c();
1360             //static int b_count = 0;
1361             //b_count += b_frames;
1362             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1363
1364             for (i = b_frames - 1; i >= 0; i--) {
1365                 int type = s->input_picture[i]->f.pict_type;
1366                 if (type && type != AV_PICTURE_TYPE_B)
1367                     b_frames = i;
1368             }
1369             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1370                 b_frames == s->max_b_frames) {
1371                 av_log(s->avctx, AV_LOG_ERROR,
1372                        "warning, too many b frames in a row\n");
1373             }
1374
1375             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1376                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1377                     s->gop_size > s->picture_in_gop_number) {
1378                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1379                 } else {
1380                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1381                         b_frames = 0;
1382                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1383                 }
1384             }
1385
1386             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1387                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1388                 b_frames--;
1389
1390             s->reordered_input_picture[0] = s->input_picture[b_frames];
1391             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1392                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1393             s->reordered_input_picture[0]->f.coded_picture_number =
1394                 s->coded_picture_number++;
1395             for (i = 0; i < b_frames; i++) {
1396                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1397                 s->reordered_input_picture[i + 1]->f.pict_type =
1398                     AV_PICTURE_TYPE_B;
1399                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1400                     s->coded_picture_number++;
1401             }
1402         }
1403     }
1404 no_output_pic:
1405     if (s->reordered_input_picture[0]) {
1406         s->reordered_input_picture[0]->f.reference =
1407            s->reordered_input_picture[0]->f.pict_type !=
1408                AV_PICTURE_TYPE_B ? 3 : 0;
1409
1410         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1411
1412         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1413             s->avctx->rc_buffer_size) {
1414             // input is a shared pix, so we can't modifiy it -> alloc a new
1415             // one & ensure that the shared one is reuseable
1416
1417             Picture *pic;
1418             int i = ff_find_unused_picture(s, 0);
1419             if (i < 0)
1420                 return i;
1421             pic = &s->picture[i];
1422
1423             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1424             if (ff_alloc_picture(s, pic, 0) < 0) {
1425                 return -1;
1426             }
1427
1428             /* mark us unused / free shared pic */
1429             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1430                 s->avctx->release_buffer(s->avctx,
1431                                          &s->reordered_input_picture[0]->f);
1432             for (i = 0; i < 4; i++)
1433                 s->reordered_input_picture[0]->f.data[i] = NULL;
1434             s->reordered_input_picture[0]->f.type = 0;
1435
1436             copy_picture_attributes(s, &pic->f,
1437                                     &s->reordered_input_picture[0]->f);
1438
1439             s->current_picture_ptr = pic;
1440         } else {
1441             // input is not a shared pix -> reuse buffer for current_pix
1442
1443             assert(s->reordered_input_picture[0]->f.type ==
1444                        FF_BUFFER_TYPE_USER ||
1445                    s->reordered_input_picture[0]->f.type ==
1446                        FF_BUFFER_TYPE_INTERNAL);
1447
1448             s->current_picture_ptr = s->reordered_input_picture[0];
1449             for (i = 0; i < 4; i++) {
1450                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1451             }
1452         }
1453         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1454
1455         s->picture_number = s->new_picture.f.display_picture_number;
1456         //printf("dpn:%d\n", s->picture_number);
1457     } else {
1458         memset(&s->new_picture, 0, sizeof(Picture));
1459     }
1460     return 0;
1461 }
1462
1463 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1464                           AVFrame *pic_arg, int *got_packet)
1465 {
1466     MpegEncContext *s = avctx->priv_data;
1467     int i, stuffing_count, ret;
1468     int context_count = s->slice_context_count;
1469
1470     s->picture_in_gop_number++;
1471
1472     if (load_input_picture(s, pic_arg) < 0)
1473         return -1;
1474
1475     if (select_input_picture(s) < 0) {
1476         return -1;
1477     }
1478
1479     /* output? */
1480     if (s->new_picture.f.data[0]) {
1481         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1482             return ret;
1483         if (s->mb_info) {
1484             s->mb_info_ptr = av_packet_new_side_data(pkt,
1485                                  AV_PKT_DATA_H263_MB_INFO,
1486                                  s->mb_width*s->mb_height*12);
1487             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1488         }
1489
1490         for (i = 0; i < context_count; i++) {
1491             int start_y = s->thread_context[i]->start_mb_y;
1492             int   end_y = s->thread_context[i]->  end_mb_y;
1493             int h       = s->mb_height;
1494             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1495             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1496
1497             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1498         }
1499
1500         s->pict_type = s->new_picture.f.pict_type;
1501         //emms_c();
1502         //printf("qs:%f %f %d\n", s->new_picture.quality,
1503         //       s->current_picture.quality, s->qscale);
1504         ff_MPV_frame_start(s, avctx);
1505 vbv_retry:
1506         if (encode_picture(s, s->picture_number) < 0)
1507             return -1;
1508
1509         avctx->header_bits = s->header_bits;
1510         avctx->mv_bits     = s->mv_bits;
1511         avctx->misc_bits   = s->misc_bits;
1512         avctx->i_tex_bits  = s->i_tex_bits;
1513         avctx->p_tex_bits  = s->p_tex_bits;
1514         avctx->i_count     = s->i_count;
1515         // FIXME f/b_count in avctx
1516         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1517         avctx->skip_count  = s->skip_count;
1518
1519         ff_MPV_frame_end(s);
1520
1521         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1522             ff_mjpeg_encode_picture_trailer(s);
1523
1524         if (avctx->rc_buffer_size) {
1525             RateControlContext *rcc = &s->rc_context;
1526             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1527
1528             if (put_bits_count(&s->pb) > max_size &&
1529                 s->lambda < s->avctx->lmax) {
1530                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1531                                        (s->qscale + 1) / s->qscale);
1532                 if (s->adaptive_quant) {
1533                     int i;
1534                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1535                         s->lambda_table[i] =
1536                             FFMAX(s->lambda_table[i] + 1,
1537                                   s->lambda_table[i] * (s->qscale + 1) /
1538                                   s->qscale);
1539                 }
1540                 s->mb_skipped = 0;        // done in MPV_frame_start()
1541                 // done in encode_picture() so we must undo it
1542                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1543                     if (s->flipflop_rounding          ||
1544                         s->codec_id == AV_CODEC_ID_H263P ||
1545                         s->codec_id == AV_CODEC_ID_MPEG4)
1546                         s->no_rounding ^= 1;
1547                 }
1548                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1549                     s->time_base       = s->last_time_base;
1550                     s->last_non_b_time = s->time - s->pp_time;
1551                 }
1552                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1553                 for (i = 0; i < context_count; i++) {
1554                     PutBitContext *pb = &s->thread_context[i]->pb;
1555                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1556                 }
1557                 goto vbv_retry;
1558             }
1559
1560             assert(s->avctx->rc_max_rate);
1561         }
1562
1563         if (s->flags & CODEC_FLAG_PASS1)
1564             ff_write_pass1_stats(s);
1565
1566         for (i = 0; i < 4; i++) {
1567             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1568             avctx->error[i] += s->current_picture_ptr->f.error[i];
1569         }
1570
1571         if (s->flags & CODEC_FLAG_PASS1)
1572             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1573                    avctx->i_tex_bits + avctx->p_tex_bits ==
1574                        put_bits_count(&s->pb));
1575         flush_put_bits(&s->pb);
1576         s->frame_bits  = put_bits_count(&s->pb);
1577
1578         stuffing_count = ff_vbv_update(s, s->frame_bits);
1579         if (stuffing_count) {
1580             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1581                     stuffing_count + 50) {
1582                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1583                 return -1;
1584             }
1585
1586             switch (s->codec_id) {
1587             case AV_CODEC_ID_MPEG1VIDEO:
1588             case AV_CODEC_ID_MPEG2VIDEO:
1589                 while (stuffing_count--) {
1590                     put_bits(&s->pb, 8, 0);
1591                 }
1592             break;
1593             case AV_CODEC_ID_MPEG4:
1594                 put_bits(&s->pb, 16, 0);
1595                 put_bits(&s->pb, 16, 0x1C3);
1596                 stuffing_count -= 4;
1597                 while (stuffing_count--) {
1598                     put_bits(&s->pb, 8, 0xFF);
1599                 }
1600             break;
1601             default:
1602                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1603             }
1604             flush_put_bits(&s->pb);
1605             s->frame_bits  = put_bits_count(&s->pb);
1606         }
1607
1608         /* update mpeg1/2 vbv_delay for CBR */
1609         if (s->avctx->rc_max_rate                          &&
1610             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1611             s->out_format == FMT_MPEG1                     &&
1612             90000LL * (avctx->rc_buffer_size - 1) <=
1613                 s->avctx->rc_max_rate * 0xFFFFLL) {
1614             int vbv_delay, min_delay;
1615             double inbits  = s->avctx->rc_max_rate *
1616                              av_q2d(s->avctx->time_base);
1617             int    minbits = s->frame_bits - 8 *
1618                              (s->vbv_delay_ptr - s->pb.buf - 1);
1619             double bits    = s->rc_context.buffer_index + minbits - inbits;
1620
1621             if (bits < 0)
1622                 av_log(s->avctx, AV_LOG_ERROR,
1623                        "Internal error, negative bits\n");
1624
1625             assert(s->repeat_first_field == 0);
1626
1627             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1628             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1629                         s->avctx->rc_max_rate;
1630
1631             vbv_delay = FFMAX(vbv_delay, min_delay);
1632
1633             av_assert0(vbv_delay < 0xFFFF);
1634
1635             s->vbv_delay_ptr[0] &= 0xF8;
1636             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1637             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1638             s->vbv_delay_ptr[2] &= 0x07;
1639             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1640             avctx->vbv_delay     = vbv_delay * 300;
1641         }
1642         s->total_bits     += s->frame_bits;
1643         avctx->frame_bits  = s->frame_bits;
1644
1645         pkt->pts = s->current_picture.f.pts;
1646         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1647             if (!s->current_picture.f.coded_picture_number)
1648                 pkt->dts = pkt->pts - s->dts_delta;
1649             else
1650                 pkt->dts = s->reordered_pts;
1651             s->reordered_pts = pkt->pts;
1652         } else
1653             pkt->dts = pkt->pts;
1654         if (s->current_picture.f.key_frame)
1655             pkt->flags |= AV_PKT_FLAG_KEY;
1656         if (s->mb_info)
1657             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1658     } else {
1659         s->frame_bits = 0;
1660     }
1661     assert((s->frame_bits & 7) == 0);
1662
1663     pkt->size = s->frame_bits / 8;
1664     *got_packet = !!pkt->size;
1665     return 0;
1666 }
1667
1668 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1669                                                 int n, int threshold)
1670 {
1671     static const char tab[64] = {
1672         3, 2, 2, 1, 1, 1, 1, 1,
1673         1, 1, 1, 1, 1, 1, 1, 1,
1674         1, 1, 1, 1, 1, 1, 1, 1,
1675         0, 0, 0, 0, 0, 0, 0, 0,
1676         0, 0, 0, 0, 0, 0, 0, 0,
1677         0, 0, 0, 0, 0, 0, 0, 0,
1678         0, 0, 0, 0, 0, 0, 0, 0,
1679         0, 0, 0, 0, 0, 0, 0, 0
1680     };
1681     int score = 0;
1682     int run = 0;
1683     int i;
1684     DCTELEM *block = s->block[n];
1685     const int last_index = s->block_last_index[n];
1686     int skip_dc;
1687
1688     if (threshold < 0) {
1689         skip_dc = 0;
1690         threshold = -threshold;
1691     } else
1692         skip_dc = 1;
1693
1694     /* Are all we could set to zero already zero? */
1695     if (last_index <= skip_dc - 1)
1696         return;
1697
1698     for (i = 0; i <= last_index; i++) {
1699         const int j = s->intra_scantable.permutated[i];
1700         const int level = FFABS(block[j]);
1701         if (level == 1) {
1702             if (skip_dc && i == 0)
1703                 continue;
1704             score += tab[run];
1705             run = 0;
1706         } else if (level > 1) {
1707             return;
1708         } else {
1709             run++;
1710         }
1711     }
1712     if (score >= threshold)
1713         return;
1714     for (i = skip_dc; i <= last_index; i++) {
1715         const int j = s->intra_scantable.permutated[i];
1716         block[j] = 0;
1717     }
1718     if (block[0])
1719         s->block_last_index[n] = 0;
1720     else
1721         s->block_last_index[n] = -1;
1722 }
1723
1724 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1725                                int last_index)
1726 {
1727     int i;
1728     const int maxlevel = s->max_qcoeff;
1729     const int minlevel = s->min_qcoeff;
1730     int overflow = 0;
1731
1732     if (s->mb_intra) {
1733         i = 1; // skip clipping of intra dc
1734     } else
1735         i = 0;
1736
1737     for (; i <= last_index; i++) {
1738         const int j = s->intra_scantable.permutated[i];
1739         int level = block[j];
1740
1741         if (level > maxlevel) {
1742             level = maxlevel;
1743             overflow++;
1744         } else if (level < minlevel) {
1745             level = minlevel;
1746             overflow++;
1747         }
1748
1749         block[j] = level;
1750     }
1751
1752     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1753         av_log(s->avctx, AV_LOG_INFO,
1754                "warning, clipping %d dct coefficients to %d..%d\n",
1755                overflow, minlevel, maxlevel);
1756 }
1757
1758 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1759 {
1760     int x, y;
1761     // FIXME optimize
1762     for (y = 0; y < 8; y++) {
1763         for (x = 0; x < 8; x++) {
1764             int x2, y2;
1765             int sum = 0;
1766             int sqr = 0;
1767             int count = 0;
1768
1769             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1770                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1771                     int v = ptr[x2 + y2 * stride];
1772                     sum += v;
1773                     sqr += v * v;
1774                     count++;
1775                 }
1776             }
1777             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1778         }
1779     }
1780 }
1781
1782 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1783                                                 int motion_x, int motion_y,
1784                                                 int mb_block_height,
1785                                                 int mb_block_count)
1786 {
1787     int16_t weight[8][64];
1788     DCTELEM orig[8][64];
1789     const int mb_x = s->mb_x;
1790     const int mb_y = s->mb_y;
1791     int i;
1792     int skip_dct[8];
1793     int dct_offset = s->linesize * 8; // default for progressive frames
1794     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1795     int wrap_y, wrap_c;
1796
1797     for (i = 0; i < mb_block_count; i++)
1798         skip_dct[i] = s->skipdct;
1799
1800     if (s->adaptive_quant) {
1801         const int last_qp = s->qscale;
1802         const int mb_xy = mb_x + mb_y * s->mb_stride;
1803
1804         s->lambda = s->lambda_table[mb_xy];
1805         update_qscale(s);
1806
1807         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1808             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1809             s->dquant = s->qscale - last_qp;
1810
1811             if (s->out_format == FMT_H263) {
1812                 s->dquant = av_clip(s->dquant, -2, 2);
1813
1814                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1815                     if (!s->mb_intra) {
1816                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1817                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1818                                 s->dquant = 0;
1819                         }
1820                         if (s->mv_type == MV_TYPE_8X8)
1821                             s->dquant = 0;
1822                     }
1823                 }
1824             }
1825         }
1826         ff_set_qscale(s, last_qp + s->dquant);
1827     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1828         ff_set_qscale(s, s->qscale + s->dquant);
1829
1830     wrap_y = s->linesize;
1831     wrap_c = s->uvlinesize;
1832     ptr_y  = s->new_picture.f.data[0] +
1833              (mb_y * 16 * wrap_y)              + mb_x * 16;
1834     ptr_cb = s->new_picture.f.data[1] +
1835              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1836     ptr_cr = s->new_picture.f.data[2] +
1837              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1838
1839     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
1840         uint8_t *ebuf = s->edge_emu_buffer + 32;
1841         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1842                                 mb_y * 16, s->width, s->height);
1843         ptr_y = ebuf;
1844         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1845                                 mb_block_height, mb_x * 8, mb_y * 8,
1846                                 (s->width+1) >> 1, (s->height+1) >> 1);
1847         ptr_cb = ebuf + 18 * wrap_y;
1848         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1849                                 mb_block_height, mb_x * 8, mb_y * 8,
1850                                 (s->width+1) >> 1, (s->height+1) >> 1);
1851         ptr_cr = ebuf + 18 * wrap_y + 8;
1852     }
1853
1854     if (s->mb_intra) {
1855         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1856             int progressive_score, interlaced_score;
1857
1858             s->interlaced_dct = 0;
1859             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1860                                                     NULL, wrap_y, 8) +
1861                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1862                                                     NULL, wrap_y, 8) - 400;
1863
1864             if (progressive_score > 0) {
1865                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1866                                                        NULL, wrap_y * 2, 8) +
1867                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1868                                                        NULL, wrap_y * 2, 8);
1869                 if (progressive_score > interlaced_score) {
1870                     s->interlaced_dct = 1;
1871
1872                     dct_offset = wrap_y;
1873                     wrap_y <<= 1;
1874                     if (s->chroma_format == CHROMA_422)
1875                         wrap_c <<= 1;
1876                 }
1877             }
1878         }
1879
1880         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1881         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1882         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1883         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1884
1885         if (s->flags & CODEC_FLAG_GRAY) {
1886             skip_dct[4] = 1;
1887             skip_dct[5] = 1;
1888         } else {
1889             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1890             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1891             if (!s->chroma_y_shift) { /* 422 */
1892                 s->dsp.get_pixels(s->block[6],
1893                                   ptr_cb + (dct_offset >> 1), wrap_c);
1894                 s->dsp.get_pixels(s->block[7],
1895                                   ptr_cr + (dct_offset >> 1), wrap_c);
1896             }
1897         }
1898     } else {
1899         op_pixels_func (*op_pix)[4];
1900         qpel_mc_func (*op_qpix)[16];
1901         uint8_t *dest_y, *dest_cb, *dest_cr;
1902
1903         dest_y  = s->dest[0];
1904         dest_cb = s->dest[1];
1905         dest_cr = s->dest[2];
1906
1907         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1908             op_pix  = s->dsp.put_pixels_tab;
1909             op_qpix = s->dsp.put_qpel_pixels_tab;
1910         } else {
1911             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1912             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1913         }
1914
1915         if (s->mv_dir & MV_DIR_FORWARD) {
1916             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1917                           s->last_picture.f.data,
1918                           op_pix, op_qpix);
1919             op_pix  = s->dsp.avg_pixels_tab;
1920             op_qpix = s->dsp.avg_qpel_pixels_tab;
1921         }
1922         if (s->mv_dir & MV_DIR_BACKWARD) {
1923             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1924                           s->next_picture.f.data,
1925                           op_pix, op_qpix);
1926         }
1927
1928         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1929             int progressive_score, interlaced_score;
1930
1931             s->interlaced_dct = 0;
1932             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1933                                                     ptr_y,              wrap_y,
1934                                                     8) +
1935                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1936                                                     ptr_y + wrap_y * 8, wrap_y,
1937                                                     8) - 400;
1938
1939             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1940                 progressive_score -= 400;
1941
1942             if (progressive_score > 0) {
1943                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1944                                                        ptr_y,
1945                                                        wrap_y * 2, 8) +
1946                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1947                                                        ptr_y + wrap_y,
1948                                                        wrap_y * 2, 8);
1949
1950                 if (progressive_score > interlaced_score) {
1951                     s->interlaced_dct = 1;
1952
1953                     dct_offset = wrap_y;
1954                     wrap_y <<= 1;
1955                     if (s->chroma_format == CHROMA_422)
1956                         wrap_c <<= 1;
1957                 }
1958             }
1959         }
1960
1961         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1962         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1963         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1964                            dest_y + dct_offset, wrap_y);
1965         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1966                            dest_y + dct_offset + 8, wrap_y);
1967
1968         if (s->flags & CODEC_FLAG_GRAY) {
1969             skip_dct[4] = 1;
1970             skip_dct[5] = 1;
1971         } else {
1972             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1973             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1974             if (!s->chroma_y_shift) { /* 422 */
1975                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1976                                    dest_cb + (dct_offset >> 1), wrap_c);
1977                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1978                                    dest_cr + (dct_offset >> 1), wrap_c);
1979             }
1980         }
1981         /* pre quantization */
1982         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1983                 2 * s->qscale * s->qscale) {
1984             // FIXME optimize
1985             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1986                               wrap_y, 8) < 20 * s->qscale)
1987                 skip_dct[0] = 1;
1988             if (s->dsp.sad[1](NULL, ptr_y + 8,
1989                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1990                 skip_dct[1] = 1;
1991             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1992                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1993                 skip_dct[2] = 1;
1994             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1995                               dest_y + dct_offset + 8,
1996                               wrap_y, 8) < 20 * s->qscale)
1997                 skip_dct[3] = 1;
1998             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1999                               wrap_c, 8) < 20 * s->qscale)
2000                 skip_dct[4] = 1;
2001             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2002                               wrap_c, 8) < 20 * s->qscale)
2003                 skip_dct[5] = 1;
2004             if (!s->chroma_y_shift) { /* 422 */
2005                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2006                                   dest_cb + (dct_offset >> 1),
2007                                   wrap_c, 8) < 20 * s->qscale)
2008                     skip_dct[6] = 1;
2009                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2010                                   dest_cr + (dct_offset >> 1),
2011                                   wrap_c, 8) < 20 * s->qscale)
2012                     skip_dct[7] = 1;
2013             }
2014         }
2015     }
2016
2017     if (s->quantizer_noise_shaping) {
2018         if (!skip_dct[0])
2019             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2020         if (!skip_dct[1])
2021             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2022         if (!skip_dct[2])
2023             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2024         if (!skip_dct[3])
2025             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2026         if (!skip_dct[4])
2027             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2028         if (!skip_dct[5])
2029             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2030         if (!s->chroma_y_shift) { /* 422 */
2031             if (!skip_dct[6])
2032                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2033                                   wrap_c);
2034             if (!skip_dct[7])
2035                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2036                                   wrap_c);
2037         }
2038         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
2039     }
2040
2041     /* DCT & quantize */
2042     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2043     {
2044         for (i = 0; i < mb_block_count; i++) {
2045             if (!skip_dct[i]) {
2046                 int overflow;
2047                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2048                 // FIXME we could decide to change to quantizer instead of
2049                 // clipping
2050                 // JS: I don't think that would be a good idea it could lower
2051                 //     quality instead of improve it. Just INTRADC clipping
2052                 //     deserves changes in quantizer
2053                 if (overflow)
2054                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2055             } else
2056                 s->block_last_index[i] = -1;
2057         }
2058         if (s->quantizer_noise_shaping) {
2059             for (i = 0; i < mb_block_count; i++) {
2060                 if (!skip_dct[i]) {
2061                     s->block_last_index[i] =
2062                         dct_quantize_refine(s, s->block[i], weight[i],
2063                                             orig[i], i, s->qscale);
2064                 }
2065             }
2066         }
2067
2068         if (s->luma_elim_threshold && !s->mb_intra)
2069             for (i = 0; i < 4; i++)
2070                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2071         if (s->chroma_elim_threshold && !s->mb_intra)
2072             for (i = 4; i < mb_block_count; i++)
2073                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2074
2075         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2076             for (i = 0; i < mb_block_count; i++) {
2077                 if (s->block_last_index[i] == -1)
2078                     s->coded_score[i] = INT_MAX / 256;
2079             }
2080         }
2081     }
2082
2083     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2084         s->block_last_index[4] =
2085         s->block_last_index[5] = 0;
2086         s->block[4][0] =
2087         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2088     }
2089
2090     // non c quantize code returns incorrect block_last_index FIXME
2091     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2092         for (i = 0; i < mb_block_count; i++) {
2093             int j;
2094             if (s->block_last_index[i] > 0) {
2095                 for (j = 63; j > 0; j--) {
2096                     if (s->block[i][s->intra_scantable.permutated[j]])
2097                         break;
2098                 }
2099                 s->block_last_index[i] = j;
2100             }
2101         }
2102     }
2103
2104     /* huffman encode */
2105     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2106     case AV_CODEC_ID_MPEG1VIDEO:
2107     case AV_CODEC_ID_MPEG2VIDEO:
2108         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2109             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2110         break;
2111     case AV_CODEC_ID_MPEG4:
2112         if (CONFIG_MPEG4_ENCODER)
2113             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2114         break;
2115     case AV_CODEC_ID_MSMPEG4V2:
2116     case AV_CODEC_ID_MSMPEG4V3:
2117     case AV_CODEC_ID_WMV1:
2118         if (CONFIG_MSMPEG4_ENCODER)
2119             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2120         break;
2121     case AV_CODEC_ID_WMV2:
2122         if (CONFIG_WMV2_ENCODER)
2123             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2124         break;
2125     case AV_CODEC_ID_H261:
2126         if (CONFIG_H261_ENCODER)
2127             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2128         break;
2129     case AV_CODEC_ID_H263:
2130     case AV_CODEC_ID_H263P:
2131     case AV_CODEC_ID_FLV1:
2132     case AV_CODEC_ID_RV10:
2133     case AV_CODEC_ID_RV20:
2134         if (CONFIG_H263_ENCODER)
2135             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2136         break;
2137     case AV_CODEC_ID_MJPEG:
2138     case AV_CODEC_ID_AMV:
2139         if (CONFIG_MJPEG_ENCODER)
2140             ff_mjpeg_encode_mb(s, s->block);
2141         break;
2142     default:
2143         av_assert1(0);
2144     }
2145 }
2146
2147 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2148 {
2149     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2150     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2151 }
2152
2153 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2154     int i;
2155
2156     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2157
2158     /* mpeg1 */
2159     d->mb_skip_run= s->mb_skip_run;
2160     for(i=0; i<3; i++)
2161         d->last_dc[i] = s->last_dc[i];
2162
2163     /* statistics */
2164     d->mv_bits= s->mv_bits;
2165     d->i_tex_bits= s->i_tex_bits;
2166     d->p_tex_bits= s->p_tex_bits;
2167     d->i_count= s->i_count;
2168     d->f_count= s->f_count;
2169     d->b_count= s->b_count;
2170     d->skip_count= s->skip_count;
2171     d->misc_bits= s->misc_bits;
2172     d->last_bits= 0;
2173
2174     d->mb_skipped= 0;
2175     d->qscale= s->qscale;
2176     d->dquant= s->dquant;
2177
2178     d->esc3_level_length= s->esc3_level_length;
2179 }
2180
2181 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2182     int i;
2183
2184     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2185     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2186
2187     /* mpeg1 */
2188     d->mb_skip_run= s->mb_skip_run;
2189     for(i=0; i<3; i++)
2190         d->last_dc[i] = s->last_dc[i];
2191
2192     /* statistics */
2193     d->mv_bits= s->mv_bits;
2194     d->i_tex_bits= s->i_tex_bits;
2195     d->p_tex_bits= s->p_tex_bits;
2196     d->i_count= s->i_count;
2197     d->f_count= s->f_count;
2198     d->b_count= s->b_count;
2199     d->skip_count= s->skip_count;
2200     d->misc_bits= s->misc_bits;
2201
2202     d->mb_intra= s->mb_intra;
2203     d->mb_skipped= s->mb_skipped;
2204     d->mv_type= s->mv_type;
2205     d->mv_dir= s->mv_dir;
2206     d->pb= s->pb;
2207     if(s->data_partitioning){
2208         d->pb2= s->pb2;
2209         d->tex_pb= s->tex_pb;
2210     }
2211     d->block= s->block;
2212     for(i=0; i<8; i++)
2213         d->block_last_index[i]= s->block_last_index[i];
2214     d->interlaced_dct= s->interlaced_dct;
2215     d->qscale= s->qscale;
2216
2217     d->esc3_level_length= s->esc3_level_length;
2218 }
2219
2220 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2221                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2222                            int *dmin, int *next_block, int motion_x, int motion_y)
2223 {
2224     int score;
2225     uint8_t *dest_backup[3];
2226
2227     copy_context_before_encode(s, backup, type);
2228
2229     s->block= s->blocks[*next_block];
2230     s->pb= pb[*next_block];
2231     if(s->data_partitioning){
2232         s->pb2   = pb2   [*next_block];
2233         s->tex_pb= tex_pb[*next_block];
2234     }
2235
2236     if(*next_block){
2237         memcpy(dest_backup, s->dest, sizeof(s->dest));
2238         s->dest[0] = s->rd_scratchpad;
2239         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2240         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2241         assert(s->linesize >= 32); //FIXME
2242     }
2243
2244     encode_mb(s, motion_x, motion_y);
2245
2246     score= put_bits_count(&s->pb);
2247     if(s->data_partitioning){
2248         score+= put_bits_count(&s->pb2);
2249         score+= put_bits_count(&s->tex_pb);
2250     }
2251
2252     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2253         ff_MPV_decode_mb(s, s->block);
2254
2255         score *= s->lambda2;
2256         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2257     }
2258
2259     if(*next_block){
2260         memcpy(s->dest, dest_backup, sizeof(s->dest));
2261     }
2262
2263     if(score<*dmin){
2264         *dmin= score;
2265         *next_block^=1;
2266
2267         copy_context_after_encode(best, s, type);
2268     }
2269 }
2270
2271 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2272     uint32_t *sq = ff_squareTbl + 256;
2273     int acc=0;
2274     int x,y;
2275
2276     if(w==16 && h==16)
2277         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2278     else if(w==8 && h==8)
2279         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2280
2281     for(y=0; y<h; y++){
2282         for(x=0; x<w; x++){
2283             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2284         }
2285     }
2286
2287     av_assert2(acc>=0);
2288
2289     return acc;
2290 }
2291
2292 static int sse_mb(MpegEncContext *s){
2293     int w= 16;
2294     int h= 16;
2295
2296     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2297     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2298
2299     if(w==16 && h==16)
2300       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2301         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2302                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2303                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2304       }else{
2305         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2306                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2307                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2308       }
2309     else
2310         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2311                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2312                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2313 }
2314
2315 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2316     MpegEncContext *s= *(void**)arg;
2317
2318
2319     s->me.pre_pass=1;
2320     s->me.dia_size= s->avctx->pre_dia_size;
2321     s->first_slice_line=1;
2322     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2323         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2324             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2325         }
2326         s->first_slice_line=0;
2327     }
2328
2329     s->me.pre_pass=0;
2330
2331     return 0;
2332 }
2333
2334 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2335     MpegEncContext *s= *(void**)arg;
2336
2337     ff_check_alignment();
2338
2339     s->me.dia_size= s->avctx->dia_size;
2340     s->first_slice_line=1;
2341     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2342         s->mb_x=0; //for block init below
2343         ff_init_block_index(s);
2344         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2345             s->block_index[0]+=2;
2346             s->block_index[1]+=2;
2347             s->block_index[2]+=2;
2348             s->block_index[3]+=2;
2349
2350             /* compute motion vector & mb_type and store in context */
2351             if(s->pict_type==AV_PICTURE_TYPE_B)
2352                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2353             else
2354                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2355         }
2356         s->first_slice_line=0;
2357     }
2358     return 0;
2359 }
2360
2361 static int mb_var_thread(AVCodecContext *c, void *arg){
2362     MpegEncContext *s= *(void**)arg;
2363     int mb_x, mb_y;
2364
2365     ff_check_alignment();
2366
2367     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2368         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2369             int xx = mb_x * 16;
2370             int yy = mb_y * 16;
2371             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2372             int varc;
2373             int sum = s->dsp.pix_sum(pix, s->linesize);
2374
2375             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2376
2377             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2378             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2379             s->me.mb_var_sum_temp    += varc;
2380         }
2381     }
2382     return 0;
2383 }
2384
2385 static void write_slice_end(MpegEncContext *s){
2386     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2387         if(s->partitioned_frame){
2388             ff_mpeg4_merge_partitions(s);
2389         }
2390
2391         ff_mpeg4_stuffing(&s->pb);
2392     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2393         ff_mjpeg_encode_stuffing(s);
2394     }
2395
2396     avpriv_align_put_bits(&s->pb);
2397     flush_put_bits(&s->pb);
2398
2399     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2400         s->misc_bits+= get_bits_diff(s);
2401 }
2402
2403 static void write_mb_info(MpegEncContext *s)
2404 {
2405     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2406     int offset = put_bits_count(&s->pb);
2407     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2408     int gobn = s->mb_y / s->gob_index;
2409     int pred_x, pred_y;
2410     if (CONFIG_H263_ENCODER)
2411         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2412     bytestream_put_le32(&ptr, offset);
2413     bytestream_put_byte(&ptr, s->qscale);
2414     bytestream_put_byte(&ptr, gobn);
2415     bytestream_put_le16(&ptr, mba);
2416     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2417     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2418     /* 4MV not implemented */
2419     bytestream_put_byte(&ptr, 0); /* hmv2 */
2420     bytestream_put_byte(&ptr, 0); /* vmv2 */
2421 }
2422
2423 static void update_mb_info(MpegEncContext *s, int startcode)
2424 {
2425     if (!s->mb_info)
2426         return;
2427     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2428         s->mb_info_size += 12;
2429         s->prev_mb_info = s->last_mb_info;
2430     }
2431     if (startcode) {
2432         s->prev_mb_info = put_bits_count(&s->pb)/8;
2433         /* This might have incremented mb_info_size above, and we return without
2434          * actually writing any info into that slot yet. But in that case,
2435          * this will be called again at the start of the after writing the
2436          * start code, actually writing the mb info. */
2437         return;
2438     }
2439
2440     s->last_mb_info = put_bits_count(&s->pb)/8;
2441     if (!s->mb_info_size)
2442         s->mb_info_size += 12;
2443     write_mb_info(s);
2444 }
2445
2446 static int encode_thread(AVCodecContext *c, void *arg){
2447     MpegEncContext *s= *(void**)arg;
2448     int mb_x, mb_y, pdif = 0;
2449     int chr_h= 16>>s->chroma_y_shift;
2450     int i, j;
2451     MpegEncContext best_s, backup_s;
2452     uint8_t bit_buf[2][MAX_MB_BYTES];
2453     uint8_t bit_buf2[2][MAX_MB_BYTES];
2454     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2455     PutBitContext pb[2], pb2[2], tex_pb[2];
2456 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2457
2458     ff_check_alignment();
2459
2460     for(i=0; i<2; i++){
2461         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2462         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2463         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2464     }
2465
2466     s->last_bits= put_bits_count(&s->pb);
2467     s->mv_bits=0;
2468     s->misc_bits=0;
2469     s->i_tex_bits=0;
2470     s->p_tex_bits=0;
2471     s->i_count=0;
2472     s->f_count=0;
2473     s->b_count=0;
2474     s->skip_count=0;
2475
2476     for(i=0; i<3; i++){
2477         /* init last dc values */
2478         /* note: quant matrix value (8) is implied here */
2479         s->last_dc[i] = 128 << s->intra_dc_precision;
2480
2481         s->current_picture.f.error[i] = 0;
2482     }
2483     if(s->codec_id==AV_CODEC_ID_AMV){
2484         s->last_dc[0] = 128*8/13;
2485         s->last_dc[1] = 128*8/14;
2486         s->last_dc[2] = 128*8/14;
2487     }
2488     s->mb_skip_run = 0;
2489     memset(s->last_mv, 0, sizeof(s->last_mv));
2490
2491     s->last_mv_dir = 0;
2492
2493     switch(s->codec_id){
2494     case AV_CODEC_ID_H263:
2495     case AV_CODEC_ID_H263P:
2496     case AV_CODEC_ID_FLV1:
2497         if (CONFIG_H263_ENCODER)
2498             s->gob_index = ff_h263_get_gob_height(s);
2499         break;
2500     case AV_CODEC_ID_MPEG4:
2501         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2502             ff_mpeg4_init_partitions(s);
2503         break;
2504     }
2505
2506     s->resync_mb_x=0;
2507     s->resync_mb_y=0;
2508     s->first_slice_line = 1;
2509     s->ptr_lastgob = s->pb.buf;
2510     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2511 //    printf("row %d at %X\n", s->mb_y, (int)s);
2512         s->mb_x=0;
2513         s->mb_y= mb_y;
2514
2515         ff_set_qscale(s, s->qscale);
2516         ff_init_block_index(s);
2517
2518         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2519             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2520             int mb_type= s->mb_type[xy];
2521 //            int d;
2522             int dmin= INT_MAX;
2523             int dir;
2524
2525             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2526                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2527                 return -1;
2528             }
2529             if(s->data_partitioning){
2530                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2531                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2532                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2533                     return -1;
2534                 }
2535             }
2536
2537             s->mb_x = mb_x;
2538             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2539             ff_update_block_index(s);
2540
2541             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2542                 ff_h261_reorder_mb_index(s);
2543                 xy= s->mb_y*s->mb_stride + s->mb_x;
2544                 mb_type= s->mb_type[xy];
2545             }
2546
2547             /* write gob / video packet header  */
2548             if(s->rtp_mode){
2549                 int current_packet_size, is_gob_start;
2550
2551                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2552
2553                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2554
2555                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2556
2557                 switch(s->codec_id){
2558                 case AV_CODEC_ID_H263:
2559                 case AV_CODEC_ID_H263P:
2560                     if(!s->h263_slice_structured)
2561                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2562                     break;
2563                 case AV_CODEC_ID_MPEG2VIDEO:
2564                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2565                 case AV_CODEC_ID_MPEG1VIDEO:
2566                     if(s->mb_skip_run) is_gob_start=0;
2567                     break;
2568                 case AV_CODEC_ID_MJPEG:
2569                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2570                     break;
2571                 }
2572
2573                 if(is_gob_start){
2574                     if(s->start_mb_y != mb_y || mb_x!=0){
2575                         write_slice_end(s);
2576                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2577                             ff_mpeg4_init_partitions(s);
2578                         }
2579                     }
2580
2581                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2582                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2583
2584                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2585                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2586                         int d= 100 / s->avctx->error_rate;
2587                         if(r % d == 0){
2588                             current_packet_size=0;
2589                             s->pb.buf_ptr= s->ptr_lastgob;
2590                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2591                         }
2592                     }
2593
2594                     if (s->avctx->rtp_callback){
2595                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2596                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2597                     }
2598                     update_mb_info(s, 1);
2599
2600                     switch(s->codec_id){
2601                     case AV_CODEC_ID_MPEG4:
2602                         if (CONFIG_MPEG4_ENCODER) {
2603                             ff_mpeg4_encode_video_packet_header(s);
2604                             ff_mpeg4_clean_buffers(s);
2605                         }
2606                     break;
2607                     case AV_CODEC_ID_MPEG1VIDEO:
2608                     case AV_CODEC_ID_MPEG2VIDEO:
2609                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2610                             ff_mpeg1_encode_slice_header(s);
2611                             ff_mpeg1_clean_buffers(s);
2612                         }
2613                     break;
2614                     case AV_CODEC_ID_H263:
2615                     case AV_CODEC_ID_H263P:
2616                         if (CONFIG_H263_ENCODER)
2617                             ff_h263_encode_gob_header(s, mb_y);
2618                     break;
2619                     }
2620
2621                     if(s->flags&CODEC_FLAG_PASS1){
2622                         int bits= put_bits_count(&s->pb);
2623                         s->misc_bits+= bits - s->last_bits;
2624                         s->last_bits= bits;
2625                     }
2626
2627                     s->ptr_lastgob += current_packet_size;
2628                     s->first_slice_line=1;
2629                     s->resync_mb_x=mb_x;
2630                     s->resync_mb_y=mb_y;
2631                 }
2632             }
2633
2634             if(  (s->resync_mb_x   == s->mb_x)
2635                && s->resync_mb_y+1 == s->mb_y){
2636                 s->first_slice_line=0;
2637             }
2638
2639             s->mb_skipped=0;
2640             s->dquant=0; //only for QP_RD
2641
2642             update_mb_info(s, 0);
2643
2644             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2645                 int next_block=0;
2646                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2647
2648                 copy_context_before_encode(&backup_s, s, -1);
2649                 backup_s.pb= s->pb;
2650                 best_s.data_partitioning= s->data_partitioning;
2651                 best_s.partitioned_frame= s->partitioned_frame;
2652                 if(s->data_partitioning){
2653                     backup_s.pb2= s->pb2;
2654                     backup_s.tex_pb= s->tex_pb;
2655                 }
2656
2657                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2658                     s->mv_dir = MV_DIR_FORWARD;
2659                     s->mv_type = MV_TYPE_16X16;
2660                     s->mb_intra= 0;
2661                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2662                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2663                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2664                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2665                 }
2666                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2667                     s->mv_dir = MV_DIR_FORWARD;
2668                     s->mv_type = MV_TYPE_FIELD;
2669                     s->mb_intra= 0;
2670                     for(i=0; i<2; i++){
2671                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2672                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2673                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2674                     }
2675                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2676                                  &dmin, &next_block, 0, 0);
2677                 }
2678                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2679                     s->mv_dir = MV_DIR_FORWARD;
2680                     s->mv_type = MV_TYPE_16X16;
2681                     s->mb_intra= 0;
2682                     s->mv[0][0][0] = 0;
2683                     s->mv[0][0][1] = 0;
2684                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2685                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2686                 }
2687                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2688                     s->mv_dir = MV_DIR_FORWARD;
2689                     s->mv_type = MV_TYPE_8X8;
2690                     s->mb_intra= 0;
2691                     for(i=0; i<4; i++){
2692                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2693                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2694                     }
2695                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2696                                  &dmin, &next_block, 0, 0);
2697                 }
2698                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2699                     s->mv_dir = MV_DIR_FORWARD;
2700                     s->mv_type = MV_TYPE_16X16;
2701                     s->mb_intra= 0;
2702                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2703                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2704                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2705                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2706                 }
2707                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2708                     s->mv_dir = MV_DIR_BACKWARD;
2709                     s->mv_type = MV_TYPE_16X16;
2710                     s->mb_intra= 0;
2711                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2712                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2713                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2714                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2715                 }
2716                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2717                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2718                     s->mv_type = MV_TYPE_16X16;
2719                     s->mb_intra= 0;
2720                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2721                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2722                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2723                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2724                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2725                                  &dmin, &next_block, 0, 0);
2726                 }
2727                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2728                     s->mv_dir = MV_DIR_FORWARD;
2729                     s->mv_type = MV_TYPE_FIELD;
2730                     s->mb_intra= 0;
2731                     for(i=0; i<2; i++){
2732                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2733                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2734                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2735                     }
2736                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2737                                  &dmin, &next_block, 0, 0);
2738                 }
2739                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2740                     s->mv_dir = MV_DIR_BACKWARD;
2741                     s->mv_type = MV_TYPE_FIELD;
2742                     s->mb_intra= 0;
2743                     for(i=0; i<2; i++){
2744                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2745                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2746                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2747                     }
2748                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2749                                  &dmin, &next_block, 0, 0);
2750                 }
2751                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2752                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2753                     s->mv_type = MV_TYPE_FIELD;
2754                     s->mb_intra= 0;
2755                     for(dir=0; dir<2; dir++){
2756                         for(i=0; i<2; i++){
2757                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2758                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2759                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2760                         }
2761                     }
2762                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2763                                  &dmin, &next_block, 0, 0);
2764                 }
2765                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2766                     s->mv_dir = 0;
2767                     s->mv_type = MV_TYPE_16X16;
2768                     s->mb_intra= 1;
2769                     s->mv[0][0][0] = 0;
2770                     s->mv[0][0][1] = 0;
2771                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2772                                  &dmin, &next_block, 0, 0);
2773                     if(s->h263_pred || s->h263_aic){
2774                         if(best_s.mb_intra)
2775                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2776                         else
2777                             ff_clean_intra_table_entries(s); //old mode?
2778                     }
2779                 }
2780
2781                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2782                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2783                         const int last_qp= backup_s.qscale;
2784                         int qpi, qp, dc[6];
2785                         DCTELEM ac[6][16];
2786                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2787                         static const int dquant_tab[4]={-1,1,-2,2};
2788
2789                         av_assert2(backup_s.dquant == 0);
2790
2791                         //FIXME intra
2792                         s->mv_dir= best_s.mv_dir;
2793                         s->mv_type = MV_TYPE_16X16;
2794                         s->mb_intra= best_s.mb_intra;
2795                         s->mv[0][0][0] = best_s.mv[0][0][0];
2796                         s->mv[0][0][1] = best_s.mv[0][0][1];
2797                         s->mv[1][0][0] = best_s.mv[1][0][0];
2798                         s->mv[1][0][1] = best_s.mv[1][0][1];
2799
2800                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2801                         for(; qpi<4; qpi++){
2802                             int dquant= dquant_tab[qpi];
2803                             qp= last_qp + dquant;
2804                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2805                                 continue;
2806                             backup_s.dquant= dquant;
2807                             if(s->mb_intra && s->dc_val[0]){
2808                                 for(i=0; i<6; i++){
2809                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2810                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2811                                 }
2812                             }
2813
2814                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2815                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2816                             if(best_s.qscale != qp){
2817                                 if(s->mb_intra && s->dc_val[0]){
2818                                     for(i=0; i<6; i++){
2819                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2820                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2821                                     }
2822                                 }
2823                             }
2824                         }
2825                     }
2826                 }
2827                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2828                     int mx= s->b_direct_mv_table[xy][0];
2829                     int my= s->b_direct_mv_table[xy][1];
2830
2831                     backup_s.dquant = 0;
2832                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2833                     s->mb_intra= 0;
2834                     ff_mpeg4_set_direct_mv(s, mx, my);
2835                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2836                                  &dmin, &next_block, mx, my);
2837                 }
2838                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2839                     backup_s.dquant = 0;
2840                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2841                     s->mb_intra= 0;
2842                     ff_mpeg4_set_direct_mv(s, 0, 0);
2843                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2844                                  &dmin, &next_block, 0, 0);
2845                 }
2846                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2847                     int coded=0;
2848                     for(i=0; i<6; i++)
2849                         coded |= s->block_last_index[i];
2850                     if(coded){
2851                         int mx,my;
2852                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2853                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2854                             mx=my=0; //FIXME find the one we actually used
2855                             ff_mpeg4_set_direct_mv(s, mx, my);
2856                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2857                             mx= s->mv[1][0][0];
2858                             my= s->mv[1][0][1];
2859                         }else{
2860                             mx= s->mv[0][0][0];
2861                             my= s->mv[0][0][1];
2862                         }
2863
2864                         s->mv_dir= best_s.mv_dir;
2865                         s->mv_type = best_s.mv_type;
2866                         s->mb_intra= 0;
2867 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2868                         s->mv[0][0][1] = best_s.mv[0][0][1];
2869                         s->mv[1][0][0] = best_s.mv[1][0][0];
2870                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2871                         backup_s.dquant= 0;
2872                         s->skipdct=1;
2873                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2874                                         &dmin, &next_block, mx, my);
2875                         s->skipdct=0;
2876                     }
2877                 }
2878
2879                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2880
2881                 copy_context_after_encode(s, &best_s, -1);
2882
2883                 pb_bits_count= put_bits_count(&s->pb);
2884                 flush_put_bits(&s->pb);
2885                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2886                 s->pb= backup_s.pb;
2887
2888                 if(s->data_partitioning){
2889                     pb2_bits_count= put_bits_count(&s->pb2);
2890                     flush_put_bits(&s->pb2);
2891                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2892                     s->pb2= backup_s.pb2;
2893
2894                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2895                     flush_put_bits(&s->tex_pb);
2896                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2897                     s->tex_pb= backup_s.tex_pb;
2898                 }
2899                 s->last_bits= put_bits_count(&s->pb);
2900
2901                 if (CONFIG_H263_ENCODER &&
2902                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2903                     ff_h263_update_motion_val(s);
2904
2905                 if(next_block==0){ //FIXME 16 vs linesize16
2906                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2907                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2908                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2909                 }
2910
2911                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2912                     ff_MPV_decode_mb(s, s->block);
2913             } else {
2914                 int motion_x = 0, motion_y = 0;
2915                 s->mv_type=MV_TYPE_16X16;
2916                 // only one MB-Type possible
2917
2918                 switch(mb_type){
2919                 case CANDIDATE_MB_TYPE_INTRA:
2920                     s->mv_dir = 0;
2921                     s->mb_intra= 1;
2922                     motion_x= s->mv[0][0][0] = 0;
2923                     motion_y= s->mv[0][0][1] = 0;
2924                     break;
2925                 case CANDIDATE_MB_TYPE_INTER:
2926                     s->mv_dir = MV_DIR_FORWARD;
2927                     s->mb_intra= 0;
2928                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2929                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2930                     break;
2931                 case CANDIDATE_MB_TYPE_INTER_I:
2932                     s->mv_dir = MV_DIR_FORWARD;
2933                     s->mv_type = MV_TYPE_FIELD;
2934                     s->mb_intra= 0;
2935                     for(i=0; i<2; i++){
2936                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2937                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2938                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2939                     }
2940                     break;
2941                 case CANDIDATE_MB_TYPE_INTER4V:
2942                     s->mv_dir = MV_DIR_FORWARD;
2943                     s->mv_type = MV_TYPE_8X8;
2944                     s->mb_intra= 0;
2945                     for(i=0; i<4; i++){
2946                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2947                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2948                     }
2949                     break;
2950                 case CANDIDATE_MB_TYPE_DIRECT:
2951                     if (CONFIG_MPEG4_ENCODER) {
2952                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2953                         s->mb_intra= 0;
2954                         motion_x=s->b_direct_mv_table[xy][0];
2955                         motion_y=s->b_direct_mv_table[xy][1];
2956                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2957                     }
2958                     break;
2959                 case CANDIDATE_MB_TYPE_DIRECT0:
2960                     if (CONFIG_MPEG4_ENCODER) {
2961                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2962                         s->mb_intra= 0;
2963                         ff_mpeg4_set_direct_mv(s, 0, 0);
2964                     }
2965                     break;
2966                 case CANDIDATE_MB_TYPE_BIDIR:
2967                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2968                     s->mb_intra= 0;
2969                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2970                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2971                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2972                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2973                     break;
2974                 case CANDIDATE_MB_TYPE_BACKWARD:
2975                     s->mv_dir = MV_DIR_BACKWARD;
2976                     s->mb_intra= 0;
2977                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2978                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2979                     break;
2980                 case CANDIDATE_MB_TYPE_FORWARD:
2981                     s->mv_dir = MV_DIR_FORWARD;
2982                     s->mb_intra= 0;
2983                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2984                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2985 //                    printf(" %d %d ", motion_x, motion_y);
2986                     break;
2987                 case CANDIDATE_MB_TYPE_FORWARD_I:
2988                     s->mv_dir = MV_DIR_FORWARD;
2989                     s->mv_type = MV_TYPE_FIELD;
2990                     s->mb_intra= 0;
2991                     for(i=0; i<2; i++){
2992                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2993                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2994                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2995                     }
2996                     break;
2997                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2998                     s->mv_dir = MV_DIR_BACKWARD;
2999                     s->mv_type = MV_TYPE_FIELD;
3000                     s->mb_intra= 0;
3001                     for(i=0; i<2; i++){
3002                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3003                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3004                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3005                     }
3006                     break;
3007                 case CANDIDATE_MB_TYPE_BIDIR_I:
3008                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3009                     s->mv_type = MV_TYPE_FIELD;
3010                     s->mb_intra= 0;
3011                     for(dir=0; dir<2; dir++){
3012                         for(i=0; i<2; i++){
3013                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3014                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3015                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3016                         }
3017                     }
3018                     break;
3019                 default:
3020                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3021                 }
3022
3023                 encode_mb(s, motion_x, motion_y);
3024
3025                 // RAL: Update last macroblock type
3026                 s->last_mv_dir = s->mv_dir;
3027
3028                 if (CONFIG_H263_ENCODER &&
3029                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3030                     ff_h263_update_motion_val(s);
3031
3032                 ff_MPV_decode_mb(s, s->block);
3033             }
3034
3035             /* clean the MV table in IPS frames for direct mode in B frames */
3036             if(s->mb_intra /* && I,P,S_TYPE */){
3037                 s->p_mv_table[xy][0]=0;
3038                 s->p_mv_table[xy][1]=0;
3039             }
3040
3041             if(s->flags&CODEC_FLAG_PSNR){
3042                 int w= 16;
3043                 int h= 16;
3044
3045                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3046                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3047
3048                 s->current_picture.f.error[0] += sse(
3049                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3050                     s->dest[0], w, h, s->linesize);
3051                 s->current_picture.f.error[1] += sse(
3052                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3053                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3054                 s->current_picture.f.error[2] += sse(
3055                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3056                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3057             }
3058             if(s->loop_filter){
3059                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3060                     ff_h263_loop_filter(s);
3061             }
3062 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
3063         }
3064     }
3065
3066     //not beautiful here but we must write it before flushing so it has to be here
3067     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3068         ff_msmpeg4_encode_ext_header(s);
3069
3070     write_slice_end(s);
3071
3072     /* Send the last GOB if RTP */
3073     if (s->avctx->rtp_callback) {
3074         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3075         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3076         /* Call the RTP callback to send the last GOB */
3077         emms_c();
3078         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3079     }
3080
3081     return 0;
3082 }
3083
3084 #define MERGE(field) dst->field += src->field; src->field=0
3085 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3086     MERGE(me.scene_change_score);
3087     MERGE(me.mc_mb_var_sum_temp);
3088     MERGE(me.mb_var_sum_temp);
3089 }
3090
3091 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3092     int i;
3093
3094     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3095     MERGE(dct_count[1]);
3096     MERGE(mv_bits);
3097     MERGE(i_tex_bits);
3098     MERGE(p_tex_bits);
3099     MERGE(i_count);
3100     MERGE(f_count);
3101     MERGE(b_count);
3102     MERGE(skip_count);
3103     MERGE(misc_bits);
3104     MERGE(error_count);
3105     MERGE(padding_bug_score);
3106     MERGE(current_picture.f.error[0]);
3107     MERGE(current_picture.f.error[1]);
3108     MERGE(current_picture.f.error[2]);
3109
3110     if(dst->avctx->noise_reduction){
3111         for(i=0; i<64; i++){
3112             MERGE(dct_error_sum[0][i]);
3113             MERGE(dct_error_sum[1][i]);
3114         }
3115     }
3116
3117     assert(put_bits_count(&src->pb) % 8 ==0);
3118     assert(put_bits_count(&dst->pb) % 8 ==0);
3119     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3120     flush_put_bits(&dst->pb);
3121 }
3122
3123 static int estimate_qp(MpegEncContext *s, int dry_run){
3124     if (s->next_lambda){
3125         s->current_picture_ptr->f.quality =
3126         s->current_picture.f.quality = s->next_lambda;
3127         if(!dry_run) s->next_lambda= 0;
3128     } else if (!s->fixed_qscale) {
3129         s->current_picture_ptr->f.quality =
3130         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3131         if (s->current_picture.f.quality < 0)
3132             return -1;
3133     }
3134
3135     if(s->adaptive_quant){
3136         switch(s->codec_id){
3137         case AV_CODEC_ID_MPEG4:
3138             if (CONFIG_MPEG4_ENCODER)
3139                 ff_clean_mpeg4_qscales(s);
3140             break;
3141         case AV_CODEC_ID_H263:
3142         case AV_CODEC_ID_H263P:
3143         case AV_CODEC_ID_FLV1:
3144             if (CONFIG_H263_ENCODER)
3145                 ff_clean_h263_qscales(s);
3146             break;
3147         default:
3148             ff_init_qscale_tab(s);
3149         }
3150
3151         s->lambda= s->lambda_table[0];
3152         //FIXME broken
3153     }else
3154         s->lambda = s->current_picture.f.quality;
3155 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3156     update_qscale(s);
3157     return 0;
3158 }
3159
3160 /* must be called before writing the header */
3161 static void set_frame_distances(MpegEncContext * s){
3162     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3163     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3164
3165     if(s->pict_type==AV_PICTURE_TYPE_B){
3166         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3167         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3168     }else{
3169         s->pp_time= s->time - s->last_non_b_time;
3170         s->last_non_b_time= s->time;
3171         assert(s->picture_number==0 || s->pp_time > 0);
3172     }
3173 }
3174
3175 static int encode_picture(MpegEncContext *s, int picture_number)
3176 {
3177     int i;
3178     int bits;
3179     int context_count = s->slice_context_count;
3180
3181     s->picture_number = picture_number;
3182
3183     /* Reset the average MB variance */
3184     s->me.mb_var_sum_temp    =
3185     s->me.mc_mb_var_sum_temp = 0;
3186
3187     /* we need to initialize some time vars before we can encode b-frames */
3188     // RAL: Condition added for MPEG1VIDEO
3189     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3190         set_frame_distances(s);
3191     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3192         ff_set_mpeg4_time(s);
3193
3194     s->me.scene_change_score=0;
3195
3196 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3197
3198     if(s->pict_type==AV_PICTURE_TYPE_I){
3199         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3200         else                        s->no_rounding=0;
3201     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3202         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3203             s->no_rounding ^= 1;
3204     }
3205
3206     if(s->flags & CODEC_FLAG_PASS2){
3207         if (estimate_qp(s,1) < 0)
3208             return -1;
3209         ff_get_2pass_fcode(s);
3210     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3211         if(s->pict_type==AV_PICTURE_TYPE_B)
3212             s->lambda= s->last_lambda_for[s->pict_type];
3213         else
3214             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3215         update_qscale(s);
3216     }
3217
3218     if(s->codec_id != AV_CODEC_ID_AMV){
3219         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3220         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3221         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3222         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3223     }
3224
3225     s->mb_intra=0; //for the rate distortion & bit compare functions
3226     for(i=1; i<context_count; i++){
3227         ff_update_duplicate_context(s->thread_context[i], s);
3228     }
3229
3230     if(ff_init_me(s)<0)
3231         return -1;
3232
3233     /* Estimate motion for every MB */
3234     if(s->pict_type != AV_PICTURE_TYPE_I){
3235         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3236         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3237         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3238             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3239                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3240             }
3241         }
3242
3243         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3244     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3245         /* I-Frame */
3246         for(i=0; i<s->mb_stride*s->mb_height; i++)
3247             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3248
3249         if(!s->fixed_qscale){
3250             /* finding spatial complexity for I-frame rate control */
3251             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3252         }
3253     }
3254     for(i=1; i<context_count; i++){
3255         merge_context_after_me(s, s->thread_context[i]);
3256     }
3257     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3258     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3259     emms_c();
3260
3261     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3262         s->pict_type= AV_PICTURE_TYPE_I;
3263         for(i=0; i<s->mb_stride*s->mb_height; i++)
3264             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3265 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3266         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3267     }
3268
3269     if(!s->umvplus){
3270         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3271             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3272
3273             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3274                 int a,b;
3275                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3276                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3277                 s->f_code= FFMAX3(s->f_code, a, b);
3278             }
3279
3280             ff_fix_long_p_mvs(s);
3281             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3282             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3283                 int j;
3284                 for(i=0; i<2; i++){
3285                     for(j=0; j<2; j++)
3286                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3287                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3288                 }
3289             }
3290         }
3291
3292         if(s->pict_type==AV_PICTURE_TYPE_B){
3293             int a, b;
3294
3295             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3296             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3297             s->f_code = FFMAX(a, b);
3298
3299             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3300             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3301             s->b_code = FFMAX(a, b);
3302
3303             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3304             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3305             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3306             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3307             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3308                 int dir, j;
3309                 for(dir=0; dir<2; dir++){
3310                     for(i=0; i<2; i++){
3311                         for(j=0; j<2; j++){
3312                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3313                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3314                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3315                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3316                         }
3317                     }
3318                 }
3319             }
3320         }
3321     }
3322
3323     if (estimate_qp(s, 0) < 0)
3324         return -1;
3325
3326     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3327         s->qscale= 3; //reduce clipping problems
3328
3329     if (s->out_format == FMT_MJPEG) {
3330         /* for mjpeg, we do include qscale in the matrix */
3331         for(i=1;i<64;i++){
3332             int j= s->dsp.idct_permutation[i];
3333
3334             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3335         }
3336         s->y_dc_scale_table=
3337         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3338         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3339         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3340                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3341         s->qscale= 8;
3342     }
3343     if(s->codec_id == AV_CODEC_ID_AMV){
3344         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3345         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3346         for(i=1;i<64;i++){
3347             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3348
3349             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3350             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3351         }
3352         s->y_dc_scale_table= y;
3353         s->c_dc_scale_table= c;
3354         s->intra_matrix[0] = 13;
3355         s->chroma_intra_matrix[0] = 14;
3356         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3357                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3358         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3359                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3360         s->qscale= 8;
3361     }
3362
3363     //FIXME var duplication
3364     s->current_picture_ptr->f.key_frame =
3365     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3366     s->current_picture_ptr->f.pict_type =
3367     s->current_picture.f.pict_type = s->pict_type;
3368
3369     if (s->current_picture.f.key_frame)
3370         s->picture_in_gop_number=0;
3371
3372     s->mb_x = s->mb_y = 0;
3373     s->last_bits= put_bits_count(&s->pb);
3374     switch(s->out_format) {
3375     case FMT_MJPEG:
3376         if (CONFIG_MJPEG_ENCODER)
3377             ff_mjpeg_encode_picture_header(s);
3378         break;
3379     case FMT_H261:
3380         if (CONFIG_H261_ENCODER)
3381             ff_h261_encode_picture_header(s, picture_number);
3382         break;
3383     case FMT_H263:
3384         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3385             ff_wmv2_encode_picture_header(s, picture_number);
3386         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3387             ff_msmpeg4_encode_picture_header(s, picture_number);
3388         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3389             ff_mpeg4_encode_picture_header(s, picture_number);
3390         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3391             ff_rv10_encode_picture_header(s, picture_number);
3392         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3393             ff_rv20_encode_picture_header(s, picture_number);
3394         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3395             ff_flv_encode_picture_header(s, picture_number);
3396         else if (CONFIG_H263_ENCODER)
3397             ff_h263_encode_picture_header(s, picture_number);
3398         break;
3399     case FMT_MPEG1:
3400         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3401             ff_mpeg1_encode_picture_header(s, picture_number);
3402         break;
3403     case FMT_H264:
3404         break;
3405     default:
3406         av_assert0(0);
3407     }
3408     bits= put_bits_count(&s->pb);
3409     s->header_bits= bits - s->last_bits;
3410
3411     for(i=1; i<context_count; i++){
3412         update_duplicate_context_after_me(s->thread_context[i], s);
3413     }
3414     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3415     for(i=1; i<context_count; i++){
3416         merge_context_after_encode(s, s->thread_context[i]);
3417     }
3418     emms_c();
3419     return 0;
3420 }
3421
3422 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3423     const int intra= s->mb_intra;
3424     int i;
3425
3426     s->dct_count[intra]++;
3427
3428     for(i=0; i<64; i++){
3429         int level= block[i];
3430
3431         if(level){
3432             if(level>0){
3433                 s->dct_error_sum[intra][i] += level;
3434                 level -= s->dct_offset[intra][i];
3435                 if(level<0) level=0;
3436             }else{
3437                 s->dct_error_sum[intra][i] -= level;
3438                 level += s->dct_offset[intra][i];
3439                 if(level>0) level=0;
3440             }
3441             block[i]= level;
3442         }
3443     }
3444 }
3445
3446 static int dct_quantize_trellis_c(MpegEncContext *s,
3447                                   DCTELEM *block, int n,
3448                                   int qscale, int *overflow){
3449     const int *qmat;
3450     const uint8_t *scantable= s->intra_scantable.scantable;
3451     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3452     int max=0;
3453     unsigned int threshold1, threshold2;
3454     int bias=0;
3455     int run_tab[65];
3456     int level_tab[65];
3457     int score_tab[65];
3458     int survivor[65];
3459     int survivor_count;
3460     int last_run=0;
3461     int last_level=0;
3462     int last_score= 0;
3463     int last_i;
3464     int coeff[2][64];
3465     int coeff_count[64];
3466     int qmul, qadd, start_i, last_non_zero, i, dc;
3467     const int esc_length= s->ac_esc_length;
3468     uint8_t * length;
3469     uint8_t * last_length;
3470     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3471
3472     s->dsp.fdct (block);
3473
3474     if(s->dct_error_sum)
3475         s->denoise_dct(s, block);
3476     qmul= qscale*16;
3477     qadd= ((qscale-1)|1)*8;
3478
3479     if (s->mb_intra) {
3480         int q;
3481         if (!s->h263_aic) {
3482             if (n < 4)
3483                 q = s->y_dc_scale;
3484             else
3485                 q = s->c_dc_scale;
3486             q = q << 3;
3487         } else{
3488             /* For AIC we skip quant/dequant of INTRADC */
3489             q = 1 << 3;
3490             qadd=0;
3491         }
3492
3493         /* note: block[0] is assumed to be positive */
3494         block[0] = (block[0] + (q >> 1)) / q;
3495         start_i = 1;
3496         last_non_zero = 0;
3497         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3498         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3499             bias= 1<<(QMAT_SHIFT-1);
3500         length     = s->intra_ac_vlc_length;
3501         last_length= s->intra_ac_vlc_last_length;
3502     } else {
3503         start_i = 0;
3504         last_non_zero = -1;
3505         qmat = s->q_inter_matrix[qscale];
3506         length     = s->inter_ac_vlc_length;
3507         last_length= s->inter_ac_vlc_last_length;
3508     }
3509     last_i= start_i;
3510
3511     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3512     threshold2= (threshold1<<1);
3513
3514     for(i=63; i>=start_i; i--) {
3515         const int j = scantable[i];
3516         int level = block[j] * qmat[j];
3517
3518         if(((unsigned)(level+threshold1))>threshold2){
3519             last_non_zero = i;
3520             break;
3521         }
3522     }
3523
3524     for(i=start_i; i<=last_non_zero; i++) {
3525         const int j = scantable[i];
3526         int level = block[j] * qmat[j];
3527
3528 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3529 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3530         if(((unsigned)(level+threshold1))>threshold2){
3531             if(level>0){
3532                 level= (bias + level)>>QMAT_SHIFT;
3533                 coeff[0][i]= level;
3534                 coeff[1][i]= level-1;
3535 //                coeff[2][k]= level-2;
3536             }else{
3537                 level= (bias - level)>>QMAT_SHIFT;
3538                 coeff[0][i]= -level;
3539                 coeff[1][i]= -level+1;
3540 //                coeff[2][k]= -level+2;
3541             }
3542             coeff_count[i]= FFMIN(level, 2);
3543             av_assert2(coeff_count[i]);
3544             max |=level;
3545         }else{
3546             coeff[0][i]= (level>>31)|1;
3547             coeff_count[i]= 1;
3548         }
3549     }
3550
3551     *overflow= s->max_qcoeff < max; //overflow might have happened
3552
3553     if(last_non_zero < start_i){
3554         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3555         return last_non_zero;
3556     }
3557
3558     score_tab[start_i]= 0;
3559     survivor[0]= start_i;
3560     survivor_count= 1;
3561
3562     for(i=start_i; i<=last_non_zero; i++){
3563         int level_index, j, zero_distortion;
3564         int dct_coeff= FFABS(block[ scantable[i] ]);
3565         int best_score=256*256*256*120;
3566
3567         if (s->dsp.fdct == ff_fdct_ifast)
3568             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3569         zero_distortion= dct_coeff*dct_coeff;
3570
3571         for(level_index=0; level_index < coeff_count[i]; level_index++){
3572             int distortion;
3573             int level= coeff[level_index][i];
3574             const int alevel= FFABS(level);
3575             int unquant_coeff;
3576
3577             av_assert2(level);
3578
3579             if(s->out_format == FMT_H263){
3580                 unquant_coeff= alevel*qmul + qadd;
3581             }else{ //MPEG1
3582                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3583                 if(s->mb_intra){
3584                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3585                         unquant_coeff =   (unquant_coeff - 1) | 1;
3586                 }else{
3587                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3588                         unquant_coeff =   (unquant_coeff - 1) | 1;
3589                 }
3590                 unquant_coeff<<= 3;
3591             }
3592
3593             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3594             level+=64;
3595             if((level&(~127)) == 0){
3596                 for(j=survivor_count-1; j>=0; j--){
3597                     int run= i - survivor[j];
3598                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3599                     score += score_tab[i-run];
3600
3601                     if(score < best_score){
3602                         best_score= score;
3603                         run_tab[i+1]= run;
3604                         level_tab[i+1]= level-64;
3605                     }
3606                 }
3607
3608                 if(s->out_format == FMT_H263){
3609                     for(j=survivor_count-1; j>=0; j--){
3610                         int run= i - survivor[j];
3611                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3612                         score += score_tab[i-run];
3613                         if(score < last_score){
3614                             last_score= score;
3615                             last_run= run;
3616                             last_level= level-64;
3617                             last_i= i+1;
3618                         }
3619                     }
3620                 }
3621             }else{
3622                 distortion += esc_length*lambda;
3623                 for(j=survivor_count-1; j>=0; j--){
3624                     int run= i - survivor[j];
3625                     int score= distortion + score_tab[i-run];
3626
3627                     if(score < best_score){
3628                         best_score= score;
3629                         run_tab[i+1]= run;
3630                         level_tab[i+1]= level-64;
3631                     }
3632                 }
3633
3634                 if(s->out_format == FMT_H263){
3635                   for(j=survivor_count-1; j>=0; j--){
3636                         int run= i - survivor[j];
3637                         int score= distortion + score_tab[i-run];
3638                         if(score < last_score){
3639                             last_score= score;
3640                             last_run= run;
3641                             last_level= level-64;
3642                             last_i= i+1;
3643                         }
3644                     }
3645                 }
3646             }
3647         }
3648
3649         score_tab[i+1]= best_score;
3650
3651         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3652         if(last_non_zero <= 27){
3653             for(; survivor_count; survivor_count--){
3654                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3655                     break;
3656             }
3657         }else{
3658             for(; survivor_count; survivor_count--){
3659                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3660                     break;
3661             }
3662         }
3663
3664         survivor[ survivor_count++ ]= i+1;
3665     }
3666
3667     if(s->out_format != FMT_H263){
3668         last_score= 256*256*256*120;
3669         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3670             int score= score_tab[i];
3671             if(i) score += lambda*2; //FIXME exacter?
3672
3673             if(score < last_score){
3674                 last_score= score;
3675                 last_i= i;
3676                 last_level= level_tab[i];
3677                 last_run= run_tab[i];
3678             }
3679         }
3680     }
3681
3682     s->coded_score[n] = last_score;
3683
3684     dc= FFABS(block[0]);
3685     last_non_zero= last_i - 1;
3686     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3687
3688     if(last_non_zero < start_i)
3689         return last_non_zero;
3690
3691     if(last_non_zero == 0 && start_i == 0){
3692         int best_level= 0;
3693         int best_score= dc * dc;
3694
3695         for(i=0; i<coeff_count[0]; i++){
3696             int level= coeff[i][0];
3697             int alevel= FFABS(level);
3698             int unquant_coeff, score, distortion;
3699
3700             if(s->out_format == FMT_H263){
3701                     unquant_coeff= (alevel*qmul + qadd)>>3;
3702             }else{ //MPEG1
3703                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3704                     unquant_coeff =   (unquant_coeff - 1) | 1;
3705             }
3706             unquant_coeff = (unquant_coeff + 4) >> 3;
3707             unquant_coeff<<= 3 + 3;
3708
3709             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3710             level+=64;
3711             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3712             else                    score= distortion + esc_length*lambda;
3713
3714             if(score < best_score){
3715                 best_score= score;
3716                 best_level= level - 64;
3717             }
3718         }
3719         block[0]= best_level;
3720         s->coded_score[n] = best_score - dc*dc;
3721         if(best_level == 0) return -1;
3722         else                return last_non_zero;
3723     }
3724
3725     i= last_i;
3726     av_assert2(last_level);
3727
3728     block[ perm_scantable[last_non_zero] ]= last_level;
3729     i -= last_run + 1;
3730
3731     for(; i>start_i; i -= run_tab[i] + 1){
3732         block[ perm_scantable[i-1] ]= level_tab[i];
3733     }
3734
3735     return last_non_zero;
3736 }
3737
3738 //#define REFINE_STATS 1
3739 static int16_t basis[64][64];
3740
3741 static void build_basis(uint8_t *perm){
3742     int i, j, x, y;
3743     emms_c();
3744     for(i=0; i<8; i++){
3745         for(j=0; j<8; j++){
3746             for(y=0; y<8; y++){
3747                 for(x=0; x<8; x++){
3748                     double s= 0.25*(1<<BASIS_SHIFT);
3749                     int index= 8*i + j;
3750                     int perm_index= perm[index];
3751                     if(i==0) s*= sqrt(0.5);
3752                     if(j==0) s*= sqrt(0.5);
3753                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3754                 }
3755             }
3756         }
3757     }
3758 }
3759
3760 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3761                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3762                         int n, int qscale){
3763     int16_t rem[64];
3764     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3765     const uint8_t *scantable= s->intra_scantable.scantable;
3766     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3767 //    unsigned int threshold1, threshold2;
3768 //    int bias=0;
3769     int run_tab[65];
3770     int prev_run=0;
3771     int prev_level=0;
3772     int qmul, qadd, start_i, last_non_zero, i, dc;
3773     uint8_t * length;
3774     uint8_t * last_length;
3775     int lambda;
3776     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3777 #ifdef REFINE_STATS
3778 static int count=0;
3779 static int after_last=0;
3780 static int to_zero=0;
3781 static int from_zero=0;
3782 static int raise=0;
3783 static int lower=0;
3784 static int messed_sign=0;
3785 #endif
3786
3787     if(basis[0][0] == 0)
3788         build_basis(s->dsp.idct_permutation);
3789
3790     qmul= qscale*2;
3791     qadd= (qscale-1)|1;
3792     if (s->mb_intra) {
3793         if (!s->h263_aic) {
3794             if (n < 4)
3795                 q = s->y_dc_scale;
3796             else
3797                 q = s->c_dc_scale;
3798         } else{
3799             /* For AIC we skip quant/dequant of INTRADC */
3800             q = 1;
3801             qadd=0;
3802         }
3803         q <<= RECON_SHIFT-3;
3804         /* note: block[0] is assumed to be positive */
3805         dc= block[0]*q;
3806 //        block[0] = (block[0] + (q >> 1)) / q;
3807         start_i = 1;
3808 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3809 //            bias= 1<<(QMAT_SHIFT-1);
3810         length     = s->intra_ac_vlc_length;
3811         last_length= s->intra_ac_vlc_last_length;
3812     } else {
3813         dc= 0;
3814         start_i = 0;
3815         length     = s->inter_ac_vlc_length;
3816         last_length= s->inter_ac_vlc_last_length;
3817     }
3818     last_non_zero = s->block_last_index[n];
3819
3820 #ifdef REFINE_STATS
3821 {START_TIMER
3822 #endif
3823     dc += (1<<(RECON_SHIFT-1));
3824     for(i=0; i<64; i++){
3825         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3826     }
3827 #ifdef REFINE_STATS
3828 STOP_TIMER("memset rem[]")}
3829 #endif
3830     sum=0;
3831     for(i=0; i<64; i++){
3832         int one= 36;
3833         int qns=4;
3834         int w;
3835
3836         w= FFABS(weight[i]) + qns*one;
3837         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3838
3839         weight[i] = w;
3840 //        w=weight[i] = (63*qns + (w/2)) / w;
3841
3842         av_assert2(w>0);
3843         av_assert2(w<(1<<6));
3844         sum += w*w;
3845     }
3846     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3847 #ifdef REFINE_STATS
3848 {START_TIMER
3849 #endif
3850     run=0;
3851     rle_index=0;
3852     for(i=start_i; i<=last_non_zero; i++){
3853         int j= perm_scantable[i];
3854         const int level= block[j];
3855         int coeff;
3856
3857         if(level){
3858             if(level<0) coeff= qmul*level - qadd;
3859             else        coeff= qmul*level + qadd;
3860             run_tab[rle_index++]=run;
3861             run=0;
3862
3863             s->dsp.add_8x8basis(rem, basis[j], coeff);
3864         }else{
3865             run++;
3866         }
3867     }
3868 #ifdef REFINE_STATS
3869 if(last_non_zero>0){
3870 STOP_TIMER("init rem[]")
3871 }
3872 }
3873
3874 {START_TIMER
3875 #endif
3876     for(;;){
3877         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3878         int best_coeff=0;
3879         int best_change=0;
3880         int run2, best_unquant_change=0, analyze_gradient;
3881 #ifdef REFINE_STATS
3882 {START_TIMER
3883 #endif
3884         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3885
3886         if(analyze_gradient){
3887 #ifdef REFINE_STATS
3888 {START_TIMER
3889 #endif
3890             for(i=0; i<64; i++){
3891                 int w= weight[i];
3892
3893                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3894             }
3895 #ifdef REFINE_STATS
3896 STOP_TIMER("rem*w*w")}
3897 {START_TIMER
3898 #endif
3899             s->dsp.fdct(d1);
3900 #ifdef REFINE_STATS
3901 STOP_TIMER("dct")}
3902 #endif
3903         }
3904
3905         if(start_i){
3906             const int level= block[0];
3907             int change, old_coeff;
3908
3909             av_assert2(s->mb_intra);
3910
3911             old_coeff= q*level;
3912
3913             for(change=-1; change<=1; change+=2){
3914                 int new_level= level + change;
3915                 int score, new_coeff;
3916
3917                 new_coeff= q*new_level;
3918                 if(new_coeff >= 2048 || new_coeff < 0)
3919                     continue;
3920
3921                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3922                 if(score<best_score){
3923                     best_score= score;
3924                     best_coeff= 0;
3925                     best_change= change;
3926                     best_unquant_change= new_coeff - old_coeff;
3927                 }
3928             }
3929         }
3930
3931         run=0;
3932         rle_index=0;
3933         run2= run_tab[rle_index++];
3934         prev_level=0;
3935         prev_run=0;
3936
3937         for(i=start_i; i<64; i++){
3938             int j= perm_scantable[i];
3939             const int level= block[j];
3940             int change, old_coeff;
3941
3942             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3943                 break;
3944
3945             if(level){
3946                 if(level<0) old_coeff= qmul*level - qadd;
3947                 else        old_coeff= qmul*level + qadd;
3948                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3949             }else{
3950                 old_coeff=0;
3951                 run2--;
3952                 av_assert2(run2>=0 || i >= last_non_zero );
3953             }
3954
3955             for(change=-1; change<=1; change+=2){
3956                 int new_level= level + change;
3957                 int score, new_coeff, unquant_change;
3958
3959                 score=0;
3960                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3961                    continue;
3962
3963                 if(new_level){
3964                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3965                     else            new_coeff= qmul*new_level + qadd;
3966                     if(new_coeff >= 2048 || new_coeff <= -2048)
3967                         continue;
3968                     //FIXME check for overflow
3969
3970                     if(level){
3971                         if(level < 63 && level > -63){
3972                             if(i < last_non_zero)
3973                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3974                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3975                             else
3976                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3977                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3978                         }
3979                     }else{
3980                         av_assert2(FFABS(new_level)==1);
3981
3982                         if(analyze_gradient){
3983                             int g= d1[ scantable[i] ];
3984                             if(g && (g^new_level) >= 0)
3985                                 continue;
3986                         }
3987
3988                         if(i < last_non_zero){
3989                             int next_i= i + run2 + 1;
3990                             int next_level= block[ perm_scantable[next_i] ] + 64;
3991
3992                             if(next_level&(~127))
3993                                 next_level= 0;
3994
3995                             if(next_i < last_non_zero)
3996                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3997                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3998                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3999                             else
4000                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4001                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4002                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4003                         }else{
4004                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4005                             if(prev_level){
4006                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4007                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4008                             }
4009                         }
4010                     }
4011                 }else{
4012                     new_coeff=0;
4013                     av_assert2(FFABS(level)==1);
4014
4015                     if(i < last_non_zero){
4016                         int next_i= i + run2 + 1;
4017                         int next_level= block[ perm_scantable[next_i] ] + 64;
4018
4019                         if(next_level&(~127))
4020                             next_level= 0;
4021
4022                         if(next_i < last_non_zero)
4023                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4024                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4025                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4026                         else
4027                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4028                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4029                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4030                     }else{
4031                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4032                         if(prev_level){
4033                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4034                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4035                         }
4036                     }
4037                 }
4038
4039                 score *= lambda;
4040
4041                 unquant_change= new_coeff - old_coeff;
4042                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4043
4044                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4045                 if(score<best_score){
4046                     best_score= score;
4047                     best_coeff= i;
4048                     best_change= change;
4049                     best_unquant_change= unquant_change;
4050                 }
4051             }
4052             if(level){
4053                 prev_level= level + 64;
4054                 if(prev_level&(~127))
4055                     prev_level= 0;
4056                 prev_run= run;
4057                 run=0;
4058             }else{
4059                 run++;
4060             }
4061         }
4062 #ifdef REFINE_STATS
4063 STOP_TIMER("iterative step")}
4064 #endif
4065
4066         if(best_change){
4067             int j= perm_scantable[ best_coeff ];
4068
4069             block[j] += best_change;
4070
4071             if(best_coeff > last_non_zero){
4072                 last_non_zero= best_coeff;
4073                 av_assert2(block[j]);
4074 #ifdef REFINE_STATS
4075 after_last++;
4076 #endif
4077             }else{
4078 #ifdef REFINE_STATS
4079 if(block[j]){
4080     if(block[j] - best_change){
4081         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4082             raise++;
4083         }else{
4084             lower++;
4085         }
4086     }else{
4087         from_zero++;
4088     }
4089 }else{
4090     to_zero++;
4091 }
4092 #endif
4093                 for(; last_non_zero>=start_i; last_non_zero--){
4094                     if(block[perm_scantable[last_non_zero]])
4095                         break;
4096                 }
4097             }
4098 #ifdef REFINE_STATS
4099 count++;
4100 if(256*256*256*64 % count == 0){
4101     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4102 }
4103 #endif
4104             run=0;
4105             rle_index=0;
4106             for(i=start_i; i<=last_non_zero; i++){
4107                 int j= perm_scantable[i];
4108                 const int level= block[j];
4109
4110                  if(level){
4111                      run_tab[rle_index++]=run;
4112                      run=0;
4113                  }else{
4114                      run++;
4115                  }
4116             }
4117
4118             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4119         }else{
4120             break;
4121         }
4122     }
4123 #ifdef REFINE_STATS
4124 if(last_non_zero>0){
4125 STOP_TIMER("iterative search")
4126 }
4127 }
4128 #endif
4129
4130     return last_non_zero;
4131 }
4132
4133 int ff_dct_quantize_c(MpegEncContext *s,
4134                         DCTELEM *block, int n,
4135                         int qscale, int *overflow)
4136 {
4137     int i, j, level, last_non_zero, q, start_i;
4138     const int *qmat;
4139     const uint8_t *scantable= s->intra_scantable.scantable;
4140     int bias;
4141     int max=0;
4142     unsigned int threshold1, threshold2;
4143
4144     s->dsp.fdct (block);
4145
4146     if(s->dct_error_sum)
4147         s->denoise_dct(s, block);
4148
4149     if (s->mb_intra) {
4150         if (!s->h263_aic) {
4151             if (n < 4)
4152                 q = s->y_dc_scale;
4153             else
4154                 q = s->c_dc_scale;
4155             q = q << 3;
4156         } else
4157             /* For AIC we skip quant/dequant of INTRADC */
4158             q = 1 << 3;
4159
4160         /* note: block[0] is assumed to be positive */
4161         block[0] = (block[0] + (q >> 1)) / q;
4162         start_i = 1;
4163         last_non_zero = 0;
4164         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4165         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4166     } else {
4167         start_i = 0;
4168         last_non_zero = -1;
4169         qmat = s->q_inter_matrix[qscale];
4170         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4171     }
4172     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4173     threshold2= (threshold1<<1);
4174     for(i=63;i>=start_i;i--) {
4175         j = scantable[i];
4176         level = block[j] * qmat[j];
4177
4178         if(((unsigned)(level+threshold1))>threshold2){
4179             last_non_zero = i;
4180             break;
4181         }else{
4182             block[j]=0;
4183         }
4184     }
4185     for(i=start_i; i<=last_non_zero; i++) {
4186         j = scantable[i];
4187         level = block[j] * qmat[j];
4188
4189 //        if(   bias+level >= (1<<QMAT_SHIFT)
4190 //           || bias-level >= (1<<QMAT_SHIFT)){
4191         if(((unsigned)(level+threshold1))>threshold2){
4192             if(level>0){
4193                 level= (bias + level)>>QMAT_SHIFT;
4194                 block[j]= level;
4195             }else{
4196                 level= (bias - level)>>QMAT_SHIFT;
4197                 block[j]= -level;
4198             }
4199             max |=level;
4200         }else{
4201             block[j]=0;
4202         }
4203     }
4204     *overflow= s->max_qcoeff < max; //overflow might have happened
4205
4206     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4207     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4208         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4209
4210     return last_non_zero;
4211 }
4212
4213 #define OFFSET(x) offsetof(MpegEncContext, x)
4214 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4215 static const AVOption h263_options[] = {
4216     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4217     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4218     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { 0 }, 0, INT_MAX, VE },
4219     FF_MPV_COMMON_OPTS
4220     { NULL },
4221 };
4222
4223 static const AVClass h263_class = {
4224     .class_name = "H.263 encoder",
4225     .item_name  = av_default_item_name,
4226     .option     = h263_options,
4227     .version    = LIBAVUTIL_VERSION_INT,
4228 };
4229
4230 AVCodec ff_h263_encoder = {
4231     .name           = "h263",
4232     .type           = AVMEDIA_TYPE_VIDEO,
4233     .id             = AV_CODEC_ID_H263,
4234     .priv_data_size = sizeof(MpegEncContext),
4235     .init           = ff_MPV_encode_init,
4236     .encode2        = ff_MPV_encode_picture,
4237     .close          = ff_MPV_encode_end,
4238     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4239     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4240     .priv_class     = &h263_class,
4241 };
4242
4243 static const AVOption h263p_options[] = {
4244     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4245     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4246     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4247     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4248     FF_MPV_COMMON_OPTS
4249     { NULL },
4250 };
4251 static const AVClass h263p_class = {
4252     .class_name = "H.263p encoder",
4253     .item_name  = av_default_item_name,
4254     .option     = h263p_options,
4255     .version    = LIBAVUTIL_VERSION_INT,
4256 };
4257
4258 AVCodec ff_h263p_encoder = {
4259     .name           = "h263p",
4260     .type           = AVMEDIA_TYPE_VIDEO,
4261     .id             = AV_CODEC_ID_H263P,
4262     .priv_data_size = sizeof(MpegEncContext),
4263     .init           = ff_MPV_encode_init,
4264     .encode2        = ff_MPV_encode_picture,
4265     .close          = ff_MPV_encode_end,
4266     .capabilities   = CODEC_CAP_SLICE_THREADS,
4267     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4268     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4269     .priv_class     = &h263p_class,
4270 };
4271
4272 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4273
4274 AVCodec ff_msmpeg4v2_encoder = {
4275     .name           = "msmpeg4v2",
4276     .type           = AVMEDIA_TYPE_VIDEO,
4277     .id             = AV_CODEC_ID_MSMPEG4V2,
4278     .priv_data_size = sizeof(MpegEncContext),
4279     .init           = ff_MPV_encode_init,
4280     .encode2        = ff_MPV_encode_picture,
4281     .close          = ff_MPV_encode_end,
4282     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4283     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4284     .priv_class     = &msmpeg4v2_class,
4285 };
4286
4287 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4288
4289 AVCodec ff_msmpeg4v3_encoder = {
4290     .name           = "msmpeg4",
4291     .type           = AVMEDIA_TYPE_VIDEO,
4292     .id             = AV_CODEC_ID_MSMPEG4V3,
4293     .priv_data_size = sizeof(MpegEncContext),
4294     .init           = ff_MPV_encode_init,
4295     .encode2        = ff_MPV_encode_picture,
4296     .close          = ff_MPV_encode_end,
4297     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4298     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4299     .priv_class     = &msmpeg4v3_class,
4300 };
4301
4302 FF_MPV_GENERIC_CLASS(wmv1)
4303
4304 AVCodec ff_wmv1_encoder = {
4305     .name           = "wmv1",
4306     .type           = AVMEDIA_TYPE_VIDEO,
4307     .id             = AV_CODEC_ID_WMV1,
4308     .priv_data_size = sizeof(MpegEncContext),
4309     .init           = ff_MPV_encode_init,
4310     .encode2        = ff_MPV_encode_picture,
4311     .close          = ff_MPV_encode_end,
4312     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4313     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4314     .priv_class     = &wmv1_class,
4315 };