]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge remote-tracking branch 'qatar/master'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "mpegvideo_common.h"
37 #include "h263.h"
38 #include "mjpegenc.h"
39 #include "msmpeg4.h"
40 #include "faandct.h"
41 #include "thread.h"
42 #include "aandcttab.h"
43 #include "flv.h"
44 #include "mpeg4video.h"
45 #include "internal.h"
46 #include "bytestream.h"
47 #include <limits.h>
48 #include "sp5x.h"
49
50 //#undef NDEBUG
51 //#include <assert.h>
52
53 static int encode_picture(MpegEncContext *s, int picture_number);
54 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
55 static int sse_mb(MpegEncContext *s);
56 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
57 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
58
59 /* enable all paranoid tests for rounding, overflows, etc... */
60 //#define PARANOID
61
62 //#define DEBUG
63
64 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
65 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
66
67 const AVOption ff_mpv_generic_options[] = {
68     FF_MPV_COMMON_OPTS
69     { NULL },
70 };
71
72 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
73                        uint16_t (*qmat16)[2][64],
74                        const uint16_t *quant_matrix,
75                        int bias, int qmin, int qmax, int intra)
76 {
77     int qscale;
78     int shift = 0;
79
80     for (qscale = qmin; qscale <= qmax; qscale++) {
81         int i;
82         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
83             dsp->fdct == ff_jpeg_fdct_islow_10 ||
84             dsp->fdct == ff_faandct) {
85             for (i = 0; i < 64; i++) {
86                 const int j = dsp->idct_permutation[i];
87                 /* 16 <= qscale * quant_matrix[i] <= 7905
88                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
89                  *             19952 <=              x  <= 249205026
90                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
91                  *           3444240 >= (1 << 36) / (x) >= 275 */
92
93                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
94                                         (qscale * quant_matrix[j]));
95             }
96         } else if (dsp->fdct == ff_fdct_ifast) {
97             for (i = 0; i < 64; i++) {
98                 const int j = dsp->idct_permutation[i];
99                 /* 16 <= qscale * quant_matrix[i] <= 7905
100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
101                  *             19952 <=              x  <= 249205026
102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
103                  *           3444240 >= (1 << 36) / (x) >= 275 */
104
105                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
106                                         (ff_aanscales[i] * qscale * quant_matrix[j]));
107             }
108         } else {
109             for (i = 0; i < 64; i++) {
110                 const int j = dsp->idct_permutation[i];
111                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
112                  * Assume x = qscale * quant_matrix[i]
113                  * So             16 <=              x  <= 7905
114                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
115                  * so          32768 >= (1 << 19) / (x) >= 67 */
116                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
117                                         (qscale * quant_matrix[j]));
118                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
119                 //                    (qscale * quant_matrix[i]);
120                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
121                                        (qscale * quant_matrix[j]);
122
123                 if (qmat16[qscale][0][i] == 0 ||
124                     qmat16[qscale][0][i] == 128 * 256)
125                     qmat16[qscale][0][i] = 128 * 256 - 1;
126                 qmat16[qscale][1][i] =
127                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
128                                 qmat16[qscale][0][i]);
129             }
130         }
131
132         for (i = intra; i < 64; i++) {
133             int64_t max = 8191;
134             if (dsp->fdct == ff_fdct_ifast) {
135                 max = (8191LL * ff_aanscales[i]) >> 14;
136             }
137             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
138                 shift++;
139             }
140         }
141     }
142     if (shift) {
143         av_log(NULL, AV_LOG_INFO,
144                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
145                QMAT_SHIFT - shift);
146     }
147 }
148
149 static inline void update_qscale(MpegEncContext *s)
150 {
151     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
152                 (FF_LAMBDA_SHIFT + 7);
153     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
154
155     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
156                  FF_LAMBDA_SHIFT;
157 }
158
159 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
160 {
161     int i;
162
163     if (matrix) {
164         put_bits(pb, 1, 1);
165         for (i = 0; i < 64; i++) {
166             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
167         }
168     } else
169         put_bits(pb, 1, 0);
170 }
171
172 /**
173  * init s->current_picture.qscale_table from s->lambda_table
174  */
175 void ff_init_qscale_tab(MpegEncContext *s)
176 {
177     int8_t * const qscale_table = s->current_picture.f.qscale_table;
178     int i;
179
180     for (i = 0; i < s->mb_num; i++) {
181         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
182         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
183         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
184                                                   s->avctx->qmax);
185     }
186 }
187
188 static void copy_picture_attributes(MpegEncContext *s,
189                                     AVFrame *dst,
190                                     AVFrame *src)
191 {
192     int i;
193
194     dst->pict_type              = src->pict_type;
195     dst->quality                = src->quality;
196     dst->coded_picture_number   = src->coded_picture_number;
197     dst->display_picture_number = src->display_picture_number;
198     //dst->reference              = src->reference;
199     dst->pts                    = src->pts;
200     dst->interlaced_frame       = src->interlaced_frame;
201     dst->top_field_first        = src->top_field_first;
202
203     if (s->avctx->me_threshold) {
204         if (!src->motion_val[0])
205             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
206         if (!src->mb_type)
207             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
208         if (!src->ref_index[0])
209             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
210         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
211             av_log(s->avctx, AV_LOG_ERROR,
212                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
213                    src->motion_subsample_log2, dst->motion_subsample_log2);
214
215         memcpy(dst->mb_type, src->mb_type,
216                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
217
218         for (i = 0; i < 2; i++) {
219             int stride = ((16 * s->mb_width ) >>
220                           src->motion_subsample_log2) + 1;
221             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
222
223             if (src->motion_val[i] &&
224                 src->motion_val[i] != dst->motion_val[i]) {
225                 memcpy(dst->motion_val[i], src->motion_val[i],
226                        2 * stride * height * sizeof(int16_t));
227             }
228             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
229                 memcpy(dst->ref_index[i], src->ref_index[i],
230                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
231             }
232         }
233     }
234 }
235
236 static void update_duplicate_context_after_me(MpegEncContext *dst,
237                                               MpegEncContext *src)
238 {
239 #define COPY(a) dst->a= src->a
240     COPY(pict_type);
241     COPY(current_picture);
242     COPY(f_code);
243     COPY(b_code);
244     COPY(qscale);
245     COPY(lambda);
246     COPY(lambda2);
247     COPY(picture_in_gop_number);
248     COPY(gop_picture_number);
249     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
250     COPY(progressive_frame);    // FIXME don't set in encode_header
251     COPY(partitioned_frame);    // FIXME don't set in encode_header
252 #undef COPY
253 }
254
255 /**
256  * Set the given MpegEncContext to defaults for encoding.
257  * the changed fields will not depend upon the prior state of the MpegEncContext.
258  */
259 static void MPV_encode_defaults(MpegEncContext *s)
260 {
261     int i;
262     ff_MPV_common_defaults(s);
263
264     for (i = -16; i < 16; i++) {
265         default_fcode_tab[i + MAX_MV] = 1;
266     }
267     s->me.mv_penalty = default_mv_penalty;
268     s->fcode_tab     = default_fcode_tab;
269 }
270
271 /* init video encoder */
272 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
273 {
274     MpegEncContext *s = avctx->priv_data;
275     int i;
276     int chroma_h_shift, chroma_v_shift;
277
278     MPV_encode_defaults(s);
279
280     switch (avctx->codec_id) {
281     case CODEC_ID_MPEG2VIDEO:
282         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
283             avctx->pix_fmt != PIX_FMT_YUV422P) {
284             av_log(avctx, AV_LOG_ERROR,
285                    "only YUV420 and YUV422 are supported\n");
286             return -1;
287         }
288         break;
289     case CODEC_ID_LJPEG:
290         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
291             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
292             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
293             avctx->pix_fmt != PIX_FMT_BGR0     &&
294             avctx->pix_fmt != PIX_FMT_BGRA     &&
295             avctx->pix_fmt != PIX_FMT_BGR24    &&
296             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
297               avctx->pix_fmt != PIX_FMT_YUV422P &&
298               avctx->pix_fmt != PIX_FMT_YUV444P) ||
299              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
300             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
301             return -1;
302         }
303         break;
304     case CODEC_ID_MJPEG:
305     case CODEC_ID_AMV:
306         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
307             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
308             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
309               avctx->pix_fmt != PIX_FMT_YUV422P) ||
310              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
311             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
312             return -1;
313         }
314         break;
315     default:
316         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
317             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
318             return -1;
319         }
320     }
321
322     switch (avctx->pix_fmt) {
323     case PIX_FMT_YUVJ422P:
324     case PIX_FMT_YUV422P:
325         s->chroma_format = CHROMA_422;
326         break;
327     case PIX_FMT_YUVJ420P:
328     case PIX_FMT_YUV420P:
329     default:
330         s->chroma_format = CHROMA_420;
331         break;
332     }
333
334     s->bit_rate = avctx->bit_rate;
335     s->width    = avctx->width;
336     s->height   = avctx->height;
337     if (avctx->gop_size > 600 &&
338         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
339         av_log(avctx, AV_LOG_WARNING,
340                "keyframe interval too large!, reducing it from %d to %d\n",
341                avctx->gop_size, 600);
342         avctx->gop_size = 600;
343     }
344     s->gop_size     = avctx->gop_size;
345     s->avctx        = avctx;
346     s->flags        = avctx->flags;
347     s->flags2       = avctx->flags2;
348     s->max_b_frames = avctx->max_b_frames;
349     s->codec_id     = avctx->codec->id;
350 #if FF_API_MPV_GLOBAL_OPTS
351     if (avctx->luma_elim_threshold)
352         s->luma_elim_threshold   = avctx->luma_elim_threshold;
353     if (avctx->chroma_elim_threshold)
354         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
355 #endif
356     s->strict_std_compliance = avctx->strict_std_compliance;
357     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
358     s->mpeg_quant         = avctx->mpeg_quant;
359     s->rtp_mode           = !!avctx->rtp_payload_size;
360     s->intra_dc_precision = avctx->intra_dc_precision;
361     s->user_specified_pts = AV_NOPTS_VALUE;
362
363     if (s->gop_size <= 1) {
364         s->intra_only = 1;
365         s->gop_size   = 12;
366     } else {
367         s->intra_only = 0;
368     }
369
370     s->me_method = avctx->me_method;
371
372     /* Fixed QSCALE */
373     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
374
375 #if FF_API_MPV_GLOBAL_OPTS
376     if (s->flags & CODEC_FLAG_QP_RD)
377         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
378 #endif
379
380     s->adaptive_quant = (s->avctx->lumi_masking ||
381                          s->avctx->dark_masking ||
382                          s->avctx->temporal_cplx_masking ||
383                          s->avctx->spatial_cplx_masking  ||
384                          s->avctx->p_masking      ||
385                          s->avctx->border_masking ||
386                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
387                         !s->fixed_qscale;
388
389     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
390
391     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
392         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
393         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
394             return -1;
395     }
396
397     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
398         av_log(avctx, AV_LOG_INFO,
399                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
400     }
401
402     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
403         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
404         return -1;
405     }
406
407     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
408         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
409         return -1;
410     }
411
412     if (avctx->rc_max_rate &&
413         avctx->rc_max_rate == avctx->bit_rate &&
414         avctx->rc_max_rate != avctx->rc_min_rate) {
415         av_log(avctx, AV_LOG_INFO,
416                "impossible bitrate constraints, this will fail\n");
417     }
418
419     if (avctx->rc_buffer_size &&
420         avctx->bit_rate * (int64_t)avctx->time_base.num >
421             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
422         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
423         return -1;
424     }
425
426     if (!s->fixed_qscale &&
427         avctx->bit_rate * av_q2d(avctx->time_base) >
428             avctx->bit_rate_tolerance) {
429         av_log(avctx, AV_LOG_ERROR,
430                "bitrate tolerance too small for bitrate\n");
431         return -1;
432     }
433
434     if (s->avctx->rc_max_rate &&
435         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
436         (s->codec_id == CODEC_ID_MPEG1VIDEO ||
437          s->codec_id == CODEC_ID_MPEG2VIDEO) &&
438         90000LL * (avctx->rc_buffer_size - 1) >
439             s->avctx->rc_max_rate * 0xFFFFLL) {
440         av_log(avctx, AV_LOG_INFO,
441                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
442                "specified vbv buffer is too large for the given bitrate!\n");
443     }
444
445     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != CODEC_ID_MPEG4 &&
446         s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P &&
447         s->codec_id != CODEC_ID_FLV1) {
448         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
449         return -1;
450     }
451
452     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
453         av_log(avctx, AV_LOG_ERROR,
454                "OBMC is only supported with simple mb decision\n");
455         return -1;
456     }
457
458     if (s->quarter_sample && s->codec_id != CODEC_ID_MPEG4) {
459         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
460         return -1;
461     }
462
463     if (s->max_b_frames                    &&
464         s->codec_id != CODEC_ID_MPEG4      &&
465         s->codec_id != CODEC_ID_MPEG1VIDEO &&
466         s->codec_id != CODEC_ID_MPEG2VIDEO) {
467         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
468         return -1;
469     }
470
471     if ((s->codec_id == CODEC_ID_MPEG4 ||
472          s->codec_id == CODEC_ID_H263  ||
473          s->codec_id == CODEC_ID_H263P) &&
474         (avctx->sample_aspect_ratio.num > 255 ||
475          avctx->sample_aspect_ratio.den > 255)) {
476         av_log(avctx, AV_LOG_WARNING,
477                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
478                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
479         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
480                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
481     }
482
483     if ((s->codec_id == CODEC_ID_H263  ||
484          s->codec_id == CODEC_ID_H263P) &&
485         (avctx->width  > 2048 ||
486          avctx->height > 1152 )) {
487         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
488         return -1;
489     }
490     if ((s->codec_id == CODEC_ID_H263  ||
491          s->codec_id == CODEC_ID_H263P) &&
492         ((avctx->width &3) ||
493          (avctx->height&3) )) {
494         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
495         return -1;
496     }
497
498     if (s->codec_id == CODEC_ID_MPEG1VIDEO &&
499         (avctx->width  > 4095 ||
500          avctx->height > 4095 )) {
501         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
502         return -1;
503     }
504
505     if (s->codec_id == CODEC_ID_MPEG2VIDEO &&
506         (avctx->width  > 16383 ||
507          avctx->height > 16383 )) {
508         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
509         return -1;
510     }
511
512     if ((s->codec_id == CODEC_ID_WMV1 ||
513          s->codec_id == CODEC_ID_WMV2) &&
514          avctx->width & 1) {
515          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
516          return -1;
517     }
518
519     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
520         s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO) {
521         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
522         return -1;
523     }
524
525     // FIXME mpeg2 uses that too
526     if (s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4) {
527         av_log(avctx, AV_LOG_ERROR,
528                "mpeg2 style quantization not supported by codec\n");
529         return -1;
530     }
531
532 #if FF_API_MPV_GLOBAL_OPTS
533     if (s->flags & CODEC_FLAG_CBP_RD)
534         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
535 #endif
536
537     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
538         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
539         return -1;
540     }
541
542     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
543         s->avctx->mb_decision != FF_MB_DECISION_RD) {
544         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
545         return -1;
546     }
547
548     if (s->avctx->scenechange_threshold < 1000000000 &&
549         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
550         av_log(avctx, AV_LOG_ERROR,
551                "closed gop with scene change detection are not supported yet, "
552                "set threshold to 1000000000\n");
553         return -1;
554     }
555
556     if (s->flags & CODEC_FLAG_LOW_DELAY) {
557         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
558             av_log(avctx, AV_LOG_ERROR,
559                   "low delay forcing is only available for mpeg2\n");
560             return -1;
561         }
562         if (s->max_b_frames != 0) {
563             av_log(avctx, AV_LOG_ERROR,
564                    "b frames cannot be used with low delay\n");
565             return -1;
566         }
567     }
568
569     if (s->q_scale_type == 1) {
570         if (avctx->qmax > 12) {
571             av_log(avctx, AV_LOG_ERROR,
572                    "non linear quant only supports qmax <= 12 currently\n");
573             return -1;
574         }
575     }
576
577     if (s->avctx->thread_count > 1         &&
578         s->codec_id != CODEC_ID_MPEG4      &&
579         s->codec_id != CODEC_ID_MPEG1VIDEO &&
580         s->codec_id != CODEC_ID_MPEG2VIDEO &&
581         (s->codec_id != CODEC_ID_H263P)) {
582         av_log(avctx, AV_LOG_ERROR,
583                "multi threaded encoding not supported by codec\n");
584         return -1;
585     }
586
587     if (s->avctx->thread_count < 1) {
588         av_log(avctx, AV_LOG_ERROR,
589                "automatic thread number detection not supported by codec, "
590                "patch welcome\n");
591         return -1;
592     }
593
594     if (s->avctx->thread_count > 1)
595         s->rtp_mode = 1;
596
597     if (!avctx->time_base.den || !avctx->time_base.num) {
598         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
599         return -1;
600     }
601
602     i = (INT_MAX / 2 + 128) >> 8;
603     if (avctx->me_threshold >= i) {
604         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
605                i - 1);
606         return -1;
607     }
608     if (avctx->mb_threshold >= i) {
609         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
610                i - 1);
611         return -1;
612     }
613
614     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
615         av_log(avctx, AV_LOG_INFO,
616                "notice: b_frame_strategy only affects the first pass\n");
617         avctx->b_frame_strategy = 0;
618     }
619
620     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
621     if (i > 1) {
622         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
623         avctx->time_base.den /= i;
624         avctx->time_base.num /= i;
625         //return -1;
626     }
627
628     if (s->mpeg_quant || s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || s->codec_id == CODEC_ID_MJPEG || s->codec_id==CODEC_ID_AMV) {
629         // (a + x * 3 / 8) / x
630         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
631         s->inter_quant_bias = 0;
632     } else {
633         s->intra_quant_bias = 0;
634         // (a - x / 4) / x
635         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
636     }
637
638     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
639         s->intra_quant_bias = avctx->intra_quant_bias;
640     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
641         s->inter_quant_bias = avctx->inter_quant_bias;
642
643     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
644
645     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
646                                   &chroma_v_shift);
647
648     if (avctx->codec_id == CODEC_ID_MPEG4 &&
649         s->avctx->time_base.den > (1 << 16) - 1) {
650         av_log(avctx, AV_LOG_ERROR,
651                "timebase %d/%d not supported by MPEG 4 standard, "
652                "the maximum admitted value for the timebase denominator "
653                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
654                (1 << 16) - 1);
655         return -1;
656     }
657     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
658
659 #if FF_API_MPV_GLOBAL_OPTS
660     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
661         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
662     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
663         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
664     if (avctx->quantizer_noise_shaping)
665         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
666 #endif
667
668     switch (avctx->codec->id) {
669     case CODEC_ID_MPEG1VIDEO:
670         s->out_format = FMT_MPEG1;
671         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
672         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
673         break;
674     case CODEC_ID_MPEG2VIDEO:
675         s->out_format = FMT_MPEG1;
676         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
677         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
678         s->rtp_mode   = 1;
679         break;
680     case CODEC_ID_LJPEG:
681     case CODEC_ID_MJPEG:
682     case CODEC_ID_AMV:
683         s->out_format = FMT_MJPEG;
684         s->intra_only = 1; /* force intra only for jpeg */
685         if (avctx->codec->id == CODEC_ID_LJPEG &&
686             (avctx->pix_fmt == PIX_FMT_BGR0
687              || s->avctx->pix_fmt == PIX_FMT_BGRA
688              || s->avctx->pix_fmt == PIX_FMT_BGR24)) {
689             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
690             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
691             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
692         } else {
693             s->mjpeg_vsample[0] = 2;
694             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
695             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
696             s->mjpeg_hsample[0] = 2;
697             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
698             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
699         }
700         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
701             ff_mjpeg_encode_init(s) < 0)
702             return -1;
703         avctx->delay = 0;
704         s->low_delay = 1;
705         break;
706     case CODEC_ID_H261:
707         if (!CONFIG_H261_ENCODER)
708             return -1;
709         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
710             av_log(avctx, AV_LOG_ERROR,
711                    "The specified picture size of %dx%d is not valid for the "
712                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
713                     s->width, s->height);
714             return -1;
715         }
716         s->out_format = FMT_H261;
717         avctx->delay  = 0;
718         s->low_delay  = 1;
719         break;
720     case CODEC_ID_H263:
721         if (!CONFIG_H263_ENCODER)
722             return -1;
723         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
724                              s->width, s->height) == 8) {
725             av_log(avctx, AV_LOG_ERROR,
726                    "The specified picture size of %dx%d is not valid for "
727                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
728                    "352x288, 704x576, and 1408x1152. "
729                    "Try H.263+.\n", s->width, s->height);
730             return -1;
731         }
732         s->out_format = FMT_H263;
733         avctx->delay  = 0;
734         s->low_delay  = 1;
735         break;
736     case CODEC_ID_H263P:
737         s->out_format = FMT_H263;
738         s->h263_plus  = 1;
739         /* Fx */
740         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
741         s->modified_quant  = s->h263_aic;
742         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
743         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
744
745         /* /Fx */
746         /* These are just to be sure */
747         avctx->delay = 0;
748         s->low_delay = 1;
749         break;
750     case CODEC_ID_FLV1:
751         s->out_format      = FMT_H263;
752         s->h263_flv        = 2; /* format = 1; 11-bit codes */
753         s->unrestricted_mv = 1;
754         s->rtp_mode  = 0; /* don't allow GOB */
755         avctx->delay = 0;
756         s->low_delay = 1;
757         break;
758     case CODEC_ID_RV10:
759         s->out_format = FMT_H263;
760         avctx->delay  = 0;
761         s->low_delay  = 1;
762         break;
763     case CODEC_ID_RV20:
764         s->out_format      = FMT_H263;
765         avctx->delay       = 0;
766         s->low_delay       = 1;
767         s->modified_quant  = 1;
768         s->h263_aic        = 1;
769         s->h263_plus       = 1;
770         s->loop_filter     = 1;
771         s->unrestricted_mv = 0;
772         break;
773     case CODEC_ID_MPEG4:
774         s->out_format      = FMT_H263;
775         s->h263_pred       = 1;
776         s->unrestricted_mv = 1;
777         s->low_delay       = s->max_b_frames ? 0 : 1;
778         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
779         break;
780     case CODEC_ID_MSMPEG4V2:
781         s->out_format      = FMT_H263;
782         s->h263_pred       = 1;
783         s->unrestricted_mv = 1;
784         s->msmpeg4_version = 2;
785         avctx->delay       = 0;
786         s->low_delay       = 1;
787         break;
788     case CODEC_ID_MSMPEG4V3:
789         s->out_format        = FMT_H263;
790         s->h263_pred         = 1;
791         s->unrestricted_mv   = 1;
792         s->msmpeg4_version   = 3;
793         s->flipflop_rounding = 1;
794         avctx->delay         = 0;
795         s->low_delay         = 1;
796         break;
797     case CODEC_ID_WMV1:
798         s->out_format        = FMT_H263;
799         s->h263_pred         = 1;
800         s->unrestricted_mv   = 1;
801         s->msmpeg4_version   = 4;
802         s->flipflop_rounding = 1;
803         avctx->delay         = 0;
804         s->low_delay         = 1;
805         break;
806     case CODEC_ID_WMV2:
807         s->out_format        = FMT_H263;
808         s->h263_pred         = 1;
809         s->unrestricted_mv   = 1;
810         s->msmpeg4_version   = 5;
811         s->flipflop_rounding = 1;
812         avctx->delay         = 0;
813         s->low_delay         = 1;
814         break;
815     default:
816         return -1;
817     }
818
819     avctx->has_b_frames = !s->low_delay;
820
821     s->encoding = 1;
822
823     s->progressive_frame    =
824     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
825                                                 CODEC_FLAG_INTERLACED_ME) ||
826                                 s->alternate_scan);
827
828     /* init */
829     if (ff_MPV_common_init(s) < 0)
830         return -1;
831
832     if (!s->dct_quantize)
833         s->dct_quantize = ff_dct_quantize_c;
834     if (!s->denoise_dct)
835         s->denoise_dct  = denoise_dct_c;
836     s->fast_dct_quantize = s->dct_quantize;
837     if (avctx->trellis)
838         s->dct_quantize  = dct_quantize_trellis_c;
839
840     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
841         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
842
843     s->quant_precision = 5;
844
845     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
846     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
847
848     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
849         ff_h261_encode_init(s);
850     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
851         ff_h263_encode_init(s);
852     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
853         ff_msmpeg4_encode_init(s);
854     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
855         && s->out_format == FMT_MPEG1)
856         ff_mpeg1_encode_init(s);
857
858     /* init q matrix */
859     for (i = 0; i < 64; i++) {
860         int j = s->dsp.idct_permutation[i];
861         if (CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4 &&
862             s->mpeg_quant) {
863             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
864             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
865         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
866             s->intra_matrix[j] =
867             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
868         } else {
869             /* mpeg1/2 */
870             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
871             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
872         }
873         if (s->avctx->intra_matrix)
874             s->intra_matrix[j] = s->avctx->intra_matrix[i];
875         if (s->avctx->inter_matrix)
876             s->inter_matrix[j] = s->avctx->inter_matrix[i];
877     }
878
879     /* precompute matrix */
880     /* for mjpeg, we do include qscale in the matrix */
881     if (s->out_format != FMT_MJPEG) {
882         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
883                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
884                           31, 1);
885         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
886                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
887                           31, 0);
888     }
889
890     if (ff_rate_control_init(s) < 0)
891         return -1;
892
893     return 0;
894 }
895
896 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
897 {
898     MpegEncContext *s = avctx->priv_data;
899
900     ff_rate_control_uninit(s);
901
902     ff_MPV_common_end(s);
903     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
904         s->out_format == FMT_MJPEG)
905         ff_mjpeg_encode_close(s);
906
907     av_freep(&avctx->extradata);
908
909     return 0;
910 }
911
912 static int get_sae(uint8_t *src, int ref, int stride)
913 {
914     int x,y;
915     int acc = 0;
916
917     for (y = 0; y < 16; y++) {
918         for (x = 0; x < 16; x++) {
919             acc += FFABS(src[x + y * stride] - ref);
920         }
921     }
922
923     return acc;
924 }
925
926 static int get_intra_count(MpegEncContext *s, uint8_t *src,
927                            uint8_t *ref, int stride)
928 {
929     int x, y, w, h;
930     int acc = 0;
931
932     w = s->width  & ~15;
933     h = s->height & ~15;
934
935     for (y = 0; y < h; y += 16) {
936         for (x = 0; x < w; x += 16) {
937             int offset = x + y * stride;
938             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
939                                      16);
940             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
941             int sae  = get_sae(src + offset, mean, stride);
942
943             acc += sae + 500 < sad;
944         }
945     }
946     return acc;
947 }
948
949
950 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
951 {
952     AVFrame *pic = NULL;
953     int64_t pts;
954     int i;
955     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
956                                                  (s->low_delay ? 0 : 1);
957     int direct = 1;
958
959     if (pic_arg) {
960         pts = pic_arg->pts;
961         pic_arg->display_picture_number = s->input_picture_number++;
962
963         if (pts != AV_NOPTS_VALUE) {
964             if (s->user_specified_pts != AV_NOPTS_VALUE) {
965                 int64_t time = pts;
966                 int64_t last = s->user_specified_pts;
967
968                 if (time <= last) {
969                     av_log(s->avctx, AV_LOG_ERROR,
970                            "Error, Invalid timestamp=%"PRId64", "
971                            "last=%"PRId64"\n", pts, s->user_specified_pts);
972                     return -1;
973                 }
974
975                 if (!s->low_delay && pic_arg->display_picture_number == 1)
976                     s->dts_delta = time - last;
977             }
978             s->user_specified_pts = pts;
979         } else {
980             if (s->user_specified_pts != AV_NOPTS_VALUE) {
981                 s->user_specified_pts =
982                 pts = s->user_specified_pts + 1;
983                 av_log(s->avctx, AV_LOG_INFO,
984                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
985                        pts);
986             } else {
987                 pts = pic_arg->display_picture_number;
988             }
989         }
990     }
991
992   if (pic_arg) {
993     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
994         direct = 0;
995     if (pic_arg->linesize[0] != s->linesize)
996         direct = 0;
997     if (pic_arg->linesize[1] != s->uvlinesize)
998         direct = 0;
999     if (pic_arg->linesize[2] != s->uvlinesize)
1000         direct = 0;
1001
1002     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
1003     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
1004
1005     if (direct) {
1006         i = ff_find_unused_picture(s, 1);
1007         if (i < 0)
1008             return i;
1009
1010         pic = &s->picture[i].f;
1011         pic->reference = 3;
1012
1013         for (i = 0; i < 4; i++) {
1014             pic->data[i]     = pic_arg->data[i];
1015             pic->linesize[i] = pic_arg->linesize[i];
1016         }
1017         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
1018             return -1;
1019         }
1020     } else {
1021         i = ff_find_unused_picture(s, 0);
1022         if (i < 0)
1023             return i;
1024
1025         pic = &s->picture[i].f;
1026         pic->reference = 3;
1027
1028         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
1029             return -1;
1030         }
1031
1032         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1033             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1034             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1035             // empty
1036         } else {
1037             int h_chroma_shift, v_chroma_shift;
1038             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
1039                                           &v_chroma_shift);
1040
1041             for (i = 0; i < 3; i++) {
1042                 int src_stride = pic_arg->linesize[i];
1043                 int dst_stride = i ? s->uvlinesize : s->linesize;
1044                 int h_shift = i ? h_chroma_shift : 0;
1045                 int v_shift = i ? v_chroma_shift : 0;
1046                 int w = s->width  >> h_shift;
1047                 int h = s->height >> v_shift;
1048                 uint8_t *src = pic_arg->data[i];
1049                 uint8_t *dst = pic->data[i];
1050
1051                 if(s->codec_id == CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)){
1052                     h= ((s->height+15)/16*16)>>v_shift;
1053                 }
1054
1055                 if (!s->avctx->rc_buffer_size)
1056                     dst += INPLACE_OFFSET;
1057
1058                 if (src_stride == dst_stride)
1059                     memcpy(dst, src, src_stride * h);
1060                 else {
1061                     while (h--) {
1062                         memcpy(dst, src, w);
1063                         dst += dst_stride;
1064                         src += src_stride;
1065                     }
1066                 }
1067             }
1068         }
1069     }
1070     copy_picture_attributes(s, pic, pic_arg);
1071     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1072   }
1073
1074     /* shift buffer entries */
1075     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1076         s->input_picture[i - 1] = s->input_picture[i];
1077
1078     s->input_picture[encoding_delay] = (Picture*) pic;
1079
1080     return 0;
1081 }
1082
1083 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1084 {
1085     int x, y, plane;
1086     int score = 0;
1087     int64_t score64 = 0;
1088
1089     for (plane = 0; plane < 3; plane++) {
1090         const int stride = p->f.linesize[plane];
1091         const int bw = plane ? 1 : 2;
1092         for (y = 0; y < s->mb_height * bw; y++) {
1093             for (x = 0; x < s->mb_width * bw; x++) {
1094                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1095                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1096                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1097                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1098
1099                 switch (s->avctx->frame_skip_exp) {
1100                 case 0: score    =  FFMAX(score, v);          break;
1101                 case 1: score   += FFABS(v);                  break;
1102                 case 2: score   += v * v;                     break;
1103                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1104                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1105                 }
1106             }
1107         }
1108     }
1109
1110     if (score)
1111         score64 = score;
1112
1113     if (score64 < s->avctx->frame_skip_threshold)
1114         return 1;
1115     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1116         return 1;
1117     return 0;
1118 }
1119
1120 static int estimate_best_b_count(MpegEncContext *s)
1121 {
1122     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1123     AVCodecContext *c = avcodec_alloc_context3(NULL);
1124     AVFrame input[FF_MAX_B_FRAMES + 2];
1125     const int scale = s->avctx->brd_scale;
1126     int i, j, out_size, p_lambda, b_lambda, lambda2;
1127     int outbuf_size  = s->width * s->height; // FIXME
1128     uint8_t *outbuf  = av_malloc(outbuf_size);
1129     int64_t best_rd  = INT64_MAX;
1130     int best_b_count = -1;
1131
1132     assert(scale >= 0 && scale <= 3);
1133
1134     //emms_c();
1135     //s->next_picture_ptr->quality;
1136     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1137     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1138     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1139     if (!b_lambda) // FIXME we should do this somewhere else
1140         b_lambda = p_lambda;
1141     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1142                FF_LAMBDA_SHIFT;
1143
1144     c->width        = s->width  >> scale;
1145     c->height       = s->height >> scale;
1146     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1147                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1148     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1149     c->mb_decision  = s->avctx->mb_decision;
1150     c->me_cmp       = s->avctx->me_cmp;
1151     c->mb_cmp       = s->avctx->mb_cmp;
1152     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1153     c->pix_fmt      = PIX_FMT_YUV420P;
1154     c->time_base    = s->avctx->time_base;
1155     c->max_b_frames = s->max_b_frames;
1156
1157     if (avcodec_open2(c, codec, NULL) < 0)
1158         return -1;
1159
1160     for (i = 0; i < s->max_b_frames + 2; i++) {
1161         int ysize = c->width * c->height;
1162         int csize = (c->width / 2) * (c->height / 2);
1163         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1164                                                 s->next_picture_ptr;
1165
1166         avcodec_get_frame_defaults(&input[i]);
1167         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1168         input[i].data[1]     = input[i].data[0] + ysize;
1169         input[i].data[2]     = input[i].data[1] + csize;
1170         input[i].linesize[0] = c->width;
1171         input[i].linesize[1] =
1172         input[i].linesize[2] = c->width / 2;
1173
1174         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1175             pre_input = *pre_input_ptr;
1176
1177             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1178                 pre_input.f.data[0] += INPLACE_OFFSET;
1179                 pre_input.f.data[1] += INPLACE_OFFSET;
1180                 pre_input.f.data[2] += INPLACE_OFFSET;
1181             }
1182
1183             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1184                                  pre_input.f.data[0], pre_input.f.linesize[0],
1185                                  c->width,      c->height);
1186             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1187                                  pre_input.f.data[1], pre_input.f.linesize[1],
1188                                  c->width >> 1, c->height >> 1);
1189             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1190                                  pre_input.f.data[2], pre_input.f.linesize[2],
1191                                  c->width >> 1, c->height >> 1);
1192         }
1193     }
1194
1195     for (j = 0; j < s->max_b_frames + 1; j++) {
1196         int64_t rd = 0;
1197
1198         if (!s->input_picture[j])
1199             break;
1200
1201         c->error[0] = c->error[1] = c->error[2] = 0;
1202
1203         input[0].pict_type = AV_PICTURE_TYPE_I;
1204         input[0].quality   = 1 * FF_QP2LAMBDA;
1205         out_size           = avcodec_encode_video(c, outbuf,
1206                                                   outbuf_size, &input[0]);
1207         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1208
1209         for (i = 0; i < s->max_b_frames + 1; i++) {
1210             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1211
1212             input[i + 1].pict_type = is_p ?
1213                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1214             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1215             out_size = avcodec_encode_video(c, outbuf, outbuf_size,
1216                                             &input[i + 1]);
1217             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1218         }
1219
1220         /* get the delayed frames */
1221         while (out_size) {
1222             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
1223             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1224         }
1225
1226         rd += c->error[0] + c->error[1] + c->error[2];
1227
1228         if (rd < best_rd) {
1229             best_rd = rd;
1230             best_b_count = j;
1231         }
1232     }
1233
1234     av_freep(&outbuf);
1235     avcodec_close(c);
1236     av_freep(&c);
1237
1238     for (i = 0; i < s->max_b_frames + 2; i++) {
1239         av_freep(&input[i].data[0]);
1240     }
1241
1242     return best_b_count;
1243 }
1244
1245 static int select_input_picture(MpegEncContext *s)
1246 {
1247     int i;
1248
1249     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1250         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1251     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1252
1253     /* set next picture type & ordering */
1254     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1255         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1256             s->next_picture_ptr == NULL || s->intra_only) {
1257             s->reordered_input_picture[0] = s->input_picture[0];
1258             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1259             s->reordered_input_picture[0]->f.coded_picture_number =
1260                 s->coded_picture_number++;
1261         } else {
1262             int b_frames;
1263
1264             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1265                 if (s->picture_in_gop_number < s->gop_size &&
1266                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1267                     // FIXME check that te gop check above is +-1 correct
1268                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1269                     //       s->input_picture[0]->f.data[0],
1270                     //       s->input_picture[0]->pts);
1271
1272                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1273                         for (i = 0; i < 4; i++)
1274                             s->input_picture[0]->f.data[i] = NULL;
1275                         s->input_picture[0]->f.type = 0;
1276                     } else {
1277                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1278                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1279
1280                         s->avctx->release_buffer(s->avctx,
1281                                                  &s->input_picture[0]->f);
1282                     }
1283
1284                     emms_c();
1285                     ff_vbv_update(s, 0);
1286
1287                     goto no_output_pic;
1288                 }
1289             }
1290
1291             if (s->flags & CODEC_FLAG_PASS2) {
1292                 for (i = 0; i < s->max_b_frames + 1; i++) {
1293                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1294
1295                     if (pict_num >= s->rc_context.num_entries)
1296                         break;
1297                     if (!s->input_picture[i]) {
1298                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1299                         break;
1300                     }
1301
1302                     s->input_picture[i]->f.pict_type =
1303                         s->rc_context.entry[pict_num].new_pict_type;
1304                 }
1305             }
1306
1307             if (s->avctx->b_frame_strategy == 0) {
1308                 b_frames = s->max_b_frames;
1309                 while (b_frames && !s->input_picture[b_frames])
1310                     b_frames--;
1311             } else if (s->avctx->b_frame_strategy == 1) {
1312                 for (i = 1; i < s->max_b_frames + 1; i++) {
1313                     if (s->input_picture[i] &&
1314                         s->input_picture[i]->b_frame_score == 0) {
1315                         s->input_picture[i]->b_frame_score =
1316                             get_intra_count(s,
1317                                             s->input_picture[i    ]->f.data[0],
1318                                             s->input_picture[i - 1]->f.data[0],
1319                                             s->linesize) + 1;
1320                     }
1321                 }
1322                 for (i = 0; i < s->max_b_frames + 1; i++) {
1323                     if (s->input_picture[i] == NULL ||
1324                         s->input_picture[i]->b_frame_score - 1 >
1325                             s->mb_num / s->avctx->b_sensitivity)
1326                         break;
1327                 }
1328
1329                 b_frames = FFMAX(0, i - 1);
1330
1331                 /* reset scores */
1332                 for (i = 0; i < b_frames + 1; i++) {
1333                     s->input_picture[i]->b_frame_score = 0;
1334                 }
1335             } else if (s->avctx->b_frame_strategy == 2) {
1336                 b_frames = estimate_best_b_count(s);
1337             } else {
1338                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1339                 b_frames = 0;
1340             }
1341
1342             emms_c();
1343             //static int b_count = 0;
1344             //b_count += b_frames;
1345             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1346
1347             for (i = b_frames - 1; i >= 0; i--) {
1348                 int type = s->input_picture[i]->f.pict_type;
1349                 if (type && type != AV_PICTURE_TYPE_B)
1350                     b_frames = i;
1351             }
1352             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1353                 b_frames == s->max_b_frames) {
1354                 av_log(s->avctx, AV_LOG_ERROR,
1355                        "warning, too many b frames in a row\n");
1356             }
1357
1358             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1359                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1360                     s->gop_size > s->picture_in_gop_number) {
1361                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1362                 } else {
1363                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1364                         b_frames = 0;
1365                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1366                 }
1367             }
1368
1369             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1370                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1371                 b_frames--;
1372
1373             s->reordered_input_picture[0] = s->input_picture[b_frames];
1374             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1375                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1376             s->reordered_input_picture[0]->f.coded_picture_number =
1377                 s->coded_picture_number++;
1378             for (i = 0; i < b_frames; i++) {
1379                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1380                 s->reordered_input_picture[i + 1]->f.pict_type =
1381                     AV_PICTURE_TYPE_B;
1382                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1383                     s->coded_picture_number++;
1384             }
1385         }
1386     }
1387 no_output_pic:
1388     if (s->reordered_input_picture[0]) {
1389         s->reordered_input_picture[0]->f.reference =
1390            s->reordered_input_picture[0]->f.pict_type !=
1391                AV_PICTURE_TYPE_B ? 3 : 0;
1392
1393         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1394
1395         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1396             s->avctx->rc_buffer_size) {
1397             // input is a shared pix, so we can't modifiy it -> alloc a new
1398             // one & ensure that the shared one is reuseable
1399
1400             Picture *pic;
1401             int i = ff_find_unused_picture(s, 0);
1402             if (i < 0)
1403                 return i;
1404             pic = &s->picture[i];
1405
1406             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1407             if (ff_alloc_picture(s, pic, 0) < 0) {
1408                 return -1;
1409             }
1410
1411             /* mark us unused / free shared pic */
1412             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1413                 s->avctx->release_buffer(s->avctx,
1414                                          &s->reordered_input_picture[0]->f);
1415             for (i = 0; i < 4; i++)
1416                 s->reordered_input_picture[0]->f.data[i] = NULL;
1417             s->reordered_input_picture[0]->f.type = 0;
1418
1419             copy_picture_attributes(s, &pic->f,
1420                                     &s->reordered_input_picture[0]->f);
1421
1422             s->current_picture_ptr = pic;
1423         } else {
1424             // input is not a shared pix -> reuse buffer for current_pix
1425
1426             assert(s->reordered_input_picture[0]->f.type ==
1427                        FF_BUFFER_TYPE_USER ||
1428                    s->reordered_input_picture[0]->f.type ==
1429                        FF_BUFFER_TYPE_INTERNAL);
1430
1431             s->current_picture_ptr = s->reordered_input_picture[0];
1432             for (i = 0; i < 4; i++) {
1433                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1434             }
1435         }
1436         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1437
1438         s->picture_number = s->new_picture.f.display_picture_number;
1439         //printf("dpn:%d\n", s->picture_number);
1440     } else {
1441         memset(&s->new_picture, 0, sizeof(Picture));
1442     }
1443     return 0;
1444 }
1445
1446 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1447                           AVFrame *pic_arg, int *got_packet)
1448 {
1449     MpegEncContext *s = avctx->priv_data;
1450     int i, stuffing_count, ret;
1451     int context_count = s->slice_context_count;
1452
1453     s->picture_in_gop_number++;
1454
1455     if (load_input_picture(s, pic_arg) < 0)
1456         return -1;
1457
1458     if (select_input_picture(s) < 0) {
1459         return -1;
1460     }
1461
1462     /* output? */
1463     if (s->new_picture.f.data[0]) {
1464         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1465             return ret;
1466         if (s->mb_info) {
1467             s->mb_info_ptr = av_packet_new_side_data(pkt,
1468                                  AV_PKT_DATA_H263_MB_INFO,
1469                                  s->mb_width*s->mb_height*12);
1470             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1471         }
1472
1473         for (i = 0; i < context_count; i++) {
1474             int start_y = s->thread_context[i]->start_mb_y;
1475             int   end_y = s->thread_context[i]->  end_mb_y;
1476             int h       = s->mb_height;
1477             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1478             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1479
1480             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1481         }
1482
1483         s->pict_type = s->new_picture.f.pict_type;
1484         //emms_c();
1485         //printf("qs:%f %f %d\n", s->new_picture.quality,
1486         //       s->current_picture.quality, s->qscale);
1487         ff_MPV_frame_start(s, avctx);
1488 vbv_retry:
1489         if (encode_picture(s, s->picture_number) < 0)
1490             return -1;
1491
1492         avctx->header_bits = s->header_bits;
1493         avctx->mv_bits     = s->mv_bits;
1494         avctx->misc_bits   = s->misc_bits;
1495         avctx->i_tex_bits  = s->i_tex_bits;
1496         avctx->p_tex_bits  = s->p_tex_bits;
1497         avctx->i_count     = s->i_count;
1498         // FIXME f/b_count in avctx
1499         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1500         avctx->skip_count  = s->skip_count;
1501
1502         ff_MPV_frame_end(s);
1503
1504         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1505             ff_mjpeg_encode_picture_trailer(s);
1506
1507         if (avctx->rc_buffer_size) {
1508             RateControlContext *rcc = &s->rc_context;
1509             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1510
1511             if (put_bits_count(&s->pb) > max_size &&
1512                 s->lambda < s->avctx->lmax) {
1513                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1514                                        (s->qscale + 1) / s->qscale);
1515                 if (s->adaptive_quant) {
1516                     int i;
1517                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1518                         s->lambda_table[i] =
1519                             FFMAX(s->lambda_table[i] + 1,
1520                                   s->lambda_table[i] * (s->qscale + 1) /
1521                                   s->qscale);
1522                 }
1523                 s->mb_skipped = 0;        // done in MPV_frame_start()
1524                 // done in encode_picture() so we must undo it
1525                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1526                     if (s->flipflop_rounding          ||
1527                         s->codec_id == CODEC_ID_H263P ||
1528                         s->codec_id == CODEC_ID_MPEG4)
1529                         s->no_rounding ^= 1;
1530                 }
1531                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1532                     s->time_base       = s->last_time_base;
1533                     s->last_non_b_time = s->time - s->pp_time;
1534                 }
1535                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1536                 for (i = 0; i < context_count; i++) {
1537                     PutBitContext *pb = &s->thread_context[i]->pb;
1538                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1539                 }
1540                 goto vbv_retry;
1541             }
1542
1543             assert(s->avctx->rc_max_rate);
1544         }
1545
1546         if (s->flags & CODEC_FLAG_PASS1)
1547             ff_write_pass1_stats(s);
1548
1549         for (i = 0; i < 4; i++) {
1550             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1551             avctx->error[i] += s->current_picture_ptr->f.error[i];
1552         }
1553
1554         if (s->flags & CODEC_FLAG_PASS1)
1555             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1556                    avctx->i_tex_bits + avctx->p_tex_bits ==
1557                        put_bits_count(&s->pb));
1558         flush_put_bits(&s->pb);
1559         s->frame_bits  = put_bits_count(&s->pb);
1560
1561         stuffing_count = ff_vbv_update(s, s->frame_bits);
1562         if (stuffing_count) {
1563             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1564                     stuffing_count + 50) {
1565                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1566                 return -1;
1567             }
1568
1569             switch (s->codec_id) {
1570             case CODEC_ID_MPEG1VIDEO:
1571             case CODEC_ID_MPEG2VIDEO:
1572                 while (stuffing_count--) {
1573                     put_bits(&s->pb, 8, 0);
1574                 }
1575             break;
1576             case CODEC_ID_MPEG4:
1577                 put_bits(&s->pb, 16, 0);
1578                 put_bits(&s->pb, 16, 0x1C3);
1579                 stuffing_count -= 4;
1580                 while (stuffing_count--) {
1581                     put_bits(&s->pb, 8, 0xFF);
1582                 }
1583             break;
1584             default:
1585                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1586             }
1587             flush_put_bits(&s->pb);
1588             s->frame_bits  = put_bits_count(&s->pb);
1589         }
1590
1591         /* update mpeg1/2 vbv_delay for CBR */
1592         if (s->avctx->rc_max_rate                          &&
1593             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1594             s->out_format == FMT_MPEG1                     &&
1595             90000LL * (avctx->rc_buffer_size - 1) <=
1596                 s->avctx->rc_max_rate * 0xFFFFLL) {
1597             int vbv_delay, min_delay;
1598             double inbits  = s->avctx->rc_max_rate *
1599                              av_q2d(s->avctx->time_base);
1600             int    minbits = s->frame_bits - 8 *
1601                              (s->vbv_delay_ptr - s->pb.buf - 1);
1602             double bits    = s->rc_context.buffer_index + minbits - inbits;
1603
1604             if (bits < 0)
1605                 av_log(s->avctx, AV_LOG_ERROR,
1606                        "Internal error, negative bits\n");
1607
1608             assert(s->repeat_first_field == 0);
1609
1610             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1611             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1612                         s->avctx->rc_max_rate;
1613
1614             vbv_delay = FFMAX(vbv_delay, min_delay);
1615
1616             assert(vbv_delay < 0xFFFF);
1617
1618             s->vbv_delay_ptr[0] &= 0xF8;
1619             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1620             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1621             s->vbv_delay_ptr[2] &= 0x07;
1622             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1623             avctx->vbv_delay     = vbv_delay * 300;
1624         }
1625         s->total_bits     += s->frame_bits;
1626         avctx->frame_bits  = s->frame_bits;
1627
1628         pkt->pts = s->current_picture.f.pts;
1629         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1630             if (!s->current_picture.f.coded_picture_number)
1631                 pkt->dts = pkt->pts - s->dts_delta;
1632             else
1633                 pkt->dts = s->reordered_pts;
1634             s->reordered_pts = pkt->pts;
1635         } else
1636             pkt->dts = pkt->pts;
1637         if (s->current_picture.f.key_frame)
1638             pkt->flags |= AV_PKT_FLAG_KEY;
1639         if (s->mb_info)
1640             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1641     } else {
1642         assert((put_bits_ptr(&s->pb) == s->pb.buf));
1643         s->frame_bits = 0;
1644     }
1645     assert((s->frame_bits & 7) == 0);
1646
1647     pkt->size = s->frame_bits / 8;
1648     *got_packet = !!pkt->size;
1649     return 0;
1650 }
1651
1652 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1653                                                 int n, int threshold)
1654 {
1655     static const char tab[64] = {
1656         3, 2, 2, 1, 1, 1, 1, 1,
1657         1, 1, 1, 1, 1, 1, 1, 1,
1658         1, 1, 1, 1, 1, 1, 1, 1,
1659         0, 0, 0, 0, 0, 0, 0, 0,
1660         0, 0, 0, 0, 0, 0, 0, 0,
1661         0, 0, 0, 0, 0, 0, 0, 0,
1662         0, 0, 0, 0, 0, 0, 0, 0,
1663         0, 0, 0, 0, 0, 0, 0, 0
1664     };
1665     int score = 0;
1666     int run = 0;
1667     int i;
1668     DCTELEM *block = s->block[n];
1669     const int last_index = s->block_last_index[n];
1670     int skip_dc;
1671
1672     if (threshold < 0) {
1673         skip_dc = 0;
1674         threshold = -threshold;
1675     } else
1676         skip_dc = 1;
1677
1678     /* Are all we could set to zero already zero? */
1679     if (last_index <= skip_dc - 1)
1680         return;
1681
1682     for (i = 0; i <= last_index; i++) {
1683         const int j = s->intra_scantable.permutated[i];
1684         const int level = FFABS(block[j]);
1685         if (level == 1) {
1686             if (skip_dc && i == 0)
1687                 continue;
1688             score += tab[run];
1689             run = 0;
1690         } else if (level > 1) {
1691             return;
1692         } else {
1693             run++;
1694         }
1695     }
1696     if (score >= threshold)
1697         return;
1698     for (i = skip_dc; i <= last_index; i++) {
1699         const int j = s->intra_scantable.permutated[i];
1700         block[j] = 0;
1701     }
1702     if (block[0])
1703         s->block_last_index[n] = 0;
1704     else
1705         s->block_last_index[n] = -1;
1706 }
1707
1708 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1709                                int last_index)
1710 {
1711     int i;
1712     const int maxlevel = s->max_qcoeff;
1713     const int minlevel = s->min_qcoeff;
1714     int overflow = 0;
1715
1716     if (s->mb_intra) {
1717         i = 1; // skip clipping of intra dc
1718     } else
1719         i = 0;
1720
1721     for (; i <= last_index; i++) {
1722         const int j = s->intra_scantable.permutated[i];
1723         int level = block[j];
1724
1725         if (level > maxlevel) {
1726             level = maxlevel;
1727             overflow++;
1728         } else if (level < minlevel) {
1729             level = minlevel;
1730             overflow++;
1731         }
1732
1733         block[j] = level;
1734     }
1735
1736     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1737         av_log(s->avctx, AV_LOG_INFO,
1738                "warning, clipping %d dct coefficients to %d..%d\n",
1739                overflow, minlevel, maxlevel);
1740 }
1741
1742 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1743 {
1744     int x, y;
1745     // FIXME optimize
1746     for (y = 0; y < 8; y++) {
1747         for (x = 0; x < 8; x++) {
1748             int x2, y2;
1749             int sum = 0;
1750             int sqr = 0;
1751             int count = 0;
1752
1753             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1754                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1755                     int v = ptr[x2 + y2 * stride];
1756                     sum += v;
1757                     sqr += v * v;
1758                     count++;
1759                 }
1760             }
1761             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1762         }
1763     }
1764 }
1765
1766 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1767                                                 int motion_x, int motion_y,
1768                                                 int mb_block_height,
1769                                                 int mb_block_count)
1770 {
1771     int16_t weight[8][64];
1772     DCTELEM orig[8][64];
1773     const int mb_x = s->mb_x;
1774     const int mb_y = s->mb_y;
1775     int i;
1776     int skip_dct[8];
1777     int dct_offset = s->linesize * 8; // default for progressive frames
1778     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1779     int wrap_y, wrap_c;
1780
1781     for (i = 0; i < mb_block_count; i++)
1782         skip_dct[i] = s->skipdct;
1783
1784     if (s->adaptive_quant) {
1785         const int last_qp = s->qscale;
1786         const int mb_xy = mb_x + mb_y * s->mb_stride;
1787
1788         s->lambda = s->lambda_table[mb_xy];
1789         update_qscale(s);
1790
1791         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1792             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1793             s->dquant = s->qscale - last_qp;
1794
1795             if (s->out_format == FMT_H263) {
1796                 s->dquant = av_clip(s->dquant, -2, 2);
1797
1798                 if (s->codec_id == CODEC_ID_MPEG4) {
1799                     if (!s->mb_intra) {
1800                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1801                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1802                                 s->dquant = 0;
1803                         }
1804                         if (s->mv_type == MV_TYPE_8X8)
1805                             s->dquant = 0;
1806                     }
1807                 }
1808             }
1809         }
1810         ff_set_qscale(s, last_qp + s->dquant);
1811     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1812         ff_set_qscale(s, s->qscale + s->dquant);
1813
1814     wrap_y = s->linesize;
1815     wrap_c = s->uvlinesize;
1816     ptr_y  = s->new_picture.f.data[0] +
1817              (mb_y * 16 * wrap_y)              + mb_x * 16;
1818     ptr_cb = s->new_picture.f.data[1] +
1819              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1820     ptr_cr = s->new_picture.f.data[2] +
1821              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1822
1823     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != CODEC_ID_AMV){
1824         uint8_t *ebuf = s->edge_emu_buffer + 32;
1825         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1826                                 mb_y * 16, s->width, s->height);
1827         ptr_y = ebuf;
1828         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1829                                 mb_block_height, mb_x * 8, mb_y * 8,
1830                                 s->width >> 1, s->height >> 1);
1831         ptr_cb = ebuf + 18 * wrap_y;
1832         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1833                                 mb_block_height, mb_x * 8, mb_y * 8,
1834                                 s->width >> 1, s->height >> 1);
1835         ptr_cr = ebuf + 18 * wrap_y + 8;
1836     }
1837
1838     if (s->mb_intra) {
1839         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1840             int progressive_score, interlaced_score;
1841
1842             s->interlaced_dct = 0;
1843             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1844                                                     NULL, wrap_y, 8) +
1845                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1846                                                     NULL, wrap_y, 8) - 400;
1847
1848             if (progressive_score > 0) {
1849                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1850                                                        NULL, wrap_y * 2, 8) +
1851                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1852                                                        NULL, wrap_y * 2, 8);
1853                 if (progressive_score > interlaced_score) {
1854                     s->interlaced_dct = 1;
1855
1856                     dct_offset = wrap_y;
1857                     wrap_y <<= 1;
1858                     if (s->chroma_format == CHROMA_422)
1859                         wrap_c <<= 1;
1860                 }
1861             }
1862         }
1863
1864         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1865         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1866         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1867         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1868
1869         if (s->flags & CODEC_FLAG_GRAY) {
1870             skip_dct[4] = 1;
1871             skip_dct[5] = 1;
1872         } else {
1873             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1874             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1875             if (!s->chroma_y_shift) { /* 422 */
1876                 s->dsp.get_pixels(s->block[6],
1877                                   ptr_cb + (dct_offset >> 1), wrap_c);
1878                 s->dsp.get_pixels(s->block[7],
1879                                   ptr_cr + (dct_offset >> 1), wrap_c);
1880             }
1881         }
1882     } else {
1883         op_pixels_func (*op_pix)[4];
1884         qpel_mc_func (*op_qpix)[16];
1885         uint8_t *dest_y, *dest_cb, *dest_cr;
1886
1887         dest_y  = s->dest[0];
1888         dest_cb = s->dest[1];
1889         dest_cr = s->dest[2];
1890
1891         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1892             op_pix  = s->dsp.put_pixels_tab;
1893             op_qpix = s->dsp.put_qpel_pixels_tab;
1894         } else {
1895             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1896             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1897         }
1898
1899         if (s->mv_dir & MV_DIR_FORWARD) {
1900             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data,
1901                        op_pix, op_qpix);
1902             op_pix  = s->dsp.avg_pixels_tab;
1903             op_qpix = s->dsp.avg_qpel_pixels_tab;
1904         }
1905         if (s->mv_dir & MV_DIR_BACKWARD) {
1906             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data,
1907                        op_pix, op_qpix);
1908         }
1909
1910         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1911             int progressive_score, interlaced_score;
1912
1913             s->interlaced_dct = 0;
1914             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1915                                                     ptr_y,              wrap_y,
1916                                                     8) +
1917                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1918                                                     ptr_y + wrap_y * 8, wrap_y,
1919                                                     8) - 400;
1920
1921             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1922                 progressive_score -= 400;
1923
1924             if (progressive_score > 0) {
1925                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1926                                                        ptr_y,
1927                                                        wrap_y * 2, 8) +
1928                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1929                                                        ptr_y + wrap_y,
1930                                                        wrap_y * 2, 8);
1931
1932                 if (progressive_score > interlaced_score) {
1933                     s->interlaced_dct = 1;
1934
1935                     dct_offset = wrap_y;
1936                     wrap_y <<= 1;
1937                     if (s->chroma_format == CHROMA_422)
1938                         wrap_c <<= 1;
1939                 }
1940             }
1941         }
1942
1943         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1944         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1945         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1946                            dest_y + dct_offset, wrap_y);
1947         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1948                            dest_y + dct_offset + 8, wrap_y);
1949
1950         if (s->flags & CODEC_FLAG_GRAY) {
1951             skip_dct[4] = 1;
1952             skip_dct[5] = 1;
1953         } else {
1954             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1955             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1956             if (!s->chroma_y_shift) { /* 422 */
1957                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1958                                    dest_cb + (dct_offset >> 1), wrap_c);
1959                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1960                                    dest_cr + (dct_offset >> 1), wrap_c);
1961             }
1962         }
1963         /* pre quantization */
1964         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1965                 2 * s->qscale * s->qscale) {
1966             // FIXME optimize
1967             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1968                               wrap_y, 8) < 20 * s->qscale)
1969                 skip_dct[0] = 1;
1970             if (s->dsp.sad[1](NULL, ptr_y + 8,
1971                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1972                 skip_dct[1] = 1;
1973             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1974                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1975                 skip_dct[2] = 1;
1976             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1977                               dest_y + dct_offset + 8,
1978                               wrap_y, 8) < 20 * s->qscale)
1979                 skip_dct[3] = 1;
1980             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1981                               wrap_c, 8) < 20 * s->qscale)
1982                 skip_dct[4] = 1;
1983             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1984                               wrap_c, 8) < 20 * s->qscale)
1985                 skip_dct[5] = 1;
1986             if (!s->chroma_y_shift) { /* 422 */
1987                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1988                                   dest_cb + (dct_offset >> 1),
1989                                   wrap_c, 8) < 20 * s->qscale)
1990                     skip_dct[6] = 1;
1991                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1992                                   dest_cr + (dct_offset >> 1),
1993                                   wrap_c, 8) < 20 * s->qscale)
1994                     skip_dct[7] = 1;
1995             }
1996         }
1997     }
1998
1999     if (s->quantizer_noise_shaping) {
2000         if (!skip_dct[0])
2001             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2002         if (!skip_dct[1])
2003             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2004         if (!skip_dct[2])
2005             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2006         if (!skip_dct[3])
2007             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2008         if (!skip_dct[4])
2009             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2010         if (!skip_dct[5])
2011             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2012         if (!s->chroma_y_shift) { /* 422 */
2013             if (!skip_dct[6])
2014                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2015                                   wrap_c);
2016             if (!skip_dct[7])
2017                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2018                                   wrap_c);
2019         }
2020         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
2021     }
2022
2023     /* DCT & quantize */
2024     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2025     {
2026         for (i = 0; i < mb_block_count; i++) {
2027             if (!skip_dct[i]) {
2028                 int overflow;
2029                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2030                 // FIXME we could decide to change to quantizer instead of
2031                 // clipping
2032                 // JS: I don't think that would be a good idea it could lower
2033                 //     quality instead of improve it. Just INTRADC clipping
2034                 //     deserves changes in quantizer
2035                 if (overflow)
2036                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2037             } else
2038                 s->block_last_index[i] = -1;
2039         }
2040         if (s->quantizer_noise_shaping) {
2041             for (i = 0; i < mb_block_count; i++) {
2042                 if (!skip_dct[i]) {
2043                     s->block_last_index[i] =
2044                         dct_quantize_refine(s, s->block[i], weight[i],
2045                                             orig[i], i, s->qscale);
2046                 }
2047             }
2048         }
2049
2050         if (s->luma_elim_threshold && !s->mb_intra)
2051             for (i = 0; i < 4; i++)
2052                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2053         if (s->chroma_elim_threshold && !s->mb_intra)
2054             for (i = 4; i < mb_block_count; i++)
2055                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2056
2057         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2058             for (i = 0; i < mb_block_count; i++) {
2059                 if (s->block_last_index[i] == -1)
2060                     s->coded_score[i] = INT_MAX / 256;
2061             }
2062         }
2063     }
2064
2065     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2066         s->block_last_index[4] =
2067         s->block_last_index[5] = 0;
2068         s->block[4][0] =
2069         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2070     }
2071
2072     // non c quantize code returns incorrect block_last_index FIXME
2073     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2074         for (i = 0; i < mb_block_count; i++) {
2075             int j;
2076             if (s->block_last_index[i] > 0) {
2077                 for (j = 63; j > 0; j--) {
2078                     if (s->block[i][s->intra_scantable.permutated[j]])
2079                         break;
2080                 }
2081                 s->block_last_index[i] = j;
2082             }
2083         }
2084     }
2085
2086     /* huffman encode */
2087     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2088     case CODEC_ID_MPEG1VIDEO:
2089     case CODEC_ID_MPEG2VIDEO:
2090         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2091             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2092         break;
2093     case CODEC_ID_MPEG4:
2094         if (CONFIG_MPEG4_ENCODER)
2095             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2096         break;
2097     case CODEC_ID_MSMPEG4V2:
2098     case CODEC_ID_MSMPEG4V3:
2099     case CODEC_ID_WMV1:
2100         if (CONFIG_MSMPEG4_ENCODER)
2101             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2102         break;
2103     case CODEC_ID_WMV2:
2104         if (CONFIG_WMV2_ENCODER)
2105             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2106         break;
2107     case CODEC_ID_H261:
2108         if (CONFIG_H261_ENCODER)
2109             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2110         break;
2111     case CODEC_ID_H263:
2112     case CODEC_ID_H263P:
2113     case CODEC_ID_FLV1:
2114     case CODEC_ID_RV10:
2115     case CODEC_ID_RV20:
2116         if (CONFIG_H263_ENCODER)
2117             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2118         break;
2119     case CODEC_ID_MJPEG:
2120     case CODEC_ID_AMV:
2121         if (CONFIG_MJPEG_ENCODER)
2122             ff_mjpeg_encode_mb(s, s->block);
2123         break;
2124     default:
2125         assert(0);
2126     }
2127 }
2128
2129 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2130 {
2131     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2132     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2133 }
2134
2135 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2136     int i;
2137
2138     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2139
2140     /* mpeg1 */
2141     d->mb_skip_run= s->mb_skip_run;
2142     for(i=0; i<3; i++)
2143         d->last_dc[i] = s->last_dc[i];
2144
2145     /* statistics */
2146     d->mv_bits= s->mv_bits;
2147     d->i_tex_bits= s->i_tex_bits;
2148     d->p_tex_bits= s->p_tex_bits;
2149     d->i_count= s->i_count;
2150     d->f_count= s->f_count;
2151     d->b_count= s->b_count;
2152     d->skip_count= s->skip_count;
2153     d->misc_bits= s->misc_bits;
2154     d->last_bits= 0;
2155
2156     d->mb_skipped= 0;
2157     d->qscale= s->qscale;
2158     d->dquant= s->dquant;
2159
2160     d->esc3_level_length= s->esc3_level_length;
2161 }
2162
2163 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2164     int i;
2165
2166     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2167     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2168
2169     /* mpeg1 */
2170     d->mb_skip_run= s->mb_skip_run;
2171     for(i=0; i<3; i++)
2172         d->last_dc[i] = s->last_dc[i];
2173
2174     /* statistics */
2175     d->mv_bits= s->mv_bits;
2176     d->i_tex_bits= s->i_tex_bits;
2177     d->p_tex_bits= s->p_tex_bits;
2178     d->i_count= s->i_count;
2179     d->f_count= s->f_count;
2180     d->b_count= s->b_count;
2181     d->skip_count= s->skip_count;
2182     d->misc_bits= s->misc_bits;
2183
2184     d->mb_intra= s->mb_intra;
2185     d->mb_skipped= s->mb_skipped;
2186     d->mv_type= s->mv_type;
2187     d->mv_dir= s->mv_dir;
2188     d->pb= s->pb;
2189     if(s->data_partitioning){
2190         d->pb2= s->pb2;
2191         d->tex_pb= s->tex_pb;
2192     }
2193     d->block= s->block;
2194     for(i=0; i<8; i++)
2195         d->block_last_index[i]= s->block_last_index[i];
2196     d->interlaced_dct= s->interlaced_dct;
2197     d->qscale= s->qscale;
2198
2199     d->esc3_level_length= s->esc3_level_length;
2200 }
2201
2202 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2203                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2204                            int *dmin, int *next_block, int motion_x, int motion_y)
2205 {
2206     int score;
2207     uint8_t *dest_backup[3];
2208
2209     copy_context_before_encode(s, backup, type);
2210
2211     s->block= s->blocks[*next_block];
2212     s->pb= pb[*next_block];
2213     if(s->data_partitioning){
2214         s->pb2   = pb2   [*next_block];
2215         s->tex_pb= tex_pb[*next_block];
2216     }
2217
2218     if(*next_block){
2219         memcpy(dest_backup, s->dest, sizeof(s->dest));
2220         s->dest[0] = s->rd_scratchpad;
2221         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2222         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2223         assert(s->linesize >= 32); //FIXME
2224     }
2225
2226     encode_mb(s, motion_x, motion_y);
2227
2228     score= put_bits_count(&s->pb);
2229     if(s->data_partitioning){
2230         score+= put_bits_count(&s->pb2);
2231         score+= put_bits_count(&s->tex_pb);
2232     }
2233
2234     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2235         ff_MPV_decode_mb(s, s->block);
2236
2237         score *= s->lambda2;
2238         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2239     }
2240
2241     if(*next_block){
2242         memcpy(s->dest, dest_backup, sizeof(s->dest));
2243     }
2244
2245     if(score<*dmin){
2246         *dmin= score;
2247         *next_block^=1;
2248
2249         copy_context_after_encode(best, s, type);
2250     }
2251 }
2252
2253 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2254     uint32_t *sq = ff_squareTbl + 256;
2255     int acc=0;
2256     int x,y;
2257
2258     if(w==16 && h==16)
2259         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2260     else if(w==8 && h==8)
2261         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2262
2263     for(y=0; y<h; y++){
2264         for(x=0; x<w; x++){
2265             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2266         }
2267     }
2268
2269     assert(acc>=0);
2270
2271     return acc;
2272 }
2273
2274 static int sse_mb(MpegEncContext *s){
2275     int w= 16;
2276     int h= 16;
2277
2278     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2279     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2280
2281     if(w==16 && h==16)
2282       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2283         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2284                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2285                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2286       }else{
2287         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2288                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2289                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2290       }
2291     else
2292         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2293                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2294                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2295 }
2296
2297 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2298     MpegEncContext *s= *(void**)arg;
2299
2300
2301     s->me.pre_pass=1;
2302     s->me.dia_size= s->avctx->pre_dia_size;
2303     s->first_slice_line=1;
2304     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2305         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2306             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2307         }
2308         s->first_slice_line=0;
2309     }
2310
2311     s->me.pre_pass=0;
2312
2313     return 0;
2314 }
2315
2316 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2317     MpegEncContext *s= *(void**)arg;
2318
2319     ff_check_alignment();
2320
2321     s->me.dia_size= s->avctx->dia_size;
2322     s->first_slice_line=1;
2323     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2324         s->mb_x=0; //for block init below
2325         ff_init_block_index(s);
2326         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2327             s->block_index[0]+=2;
2328             s->block_index[1]+=2;
2329             s->block_index[2]+=2;
2330             s->block_index[3]+=2;
2331
2332             /* compute motion vector & mb_type and store in context */
2333             if(s->pict_type==AV_PICTURE_TYPE_B)
2334                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2335             else
2336                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2337         }
2338         s->first_slice_line=0;
2339     }
2340     return 0;
2341 }
2342
2343 static int mb_var_thread(AVCodecContext *c, void *arg){
2344     MpegEncContext *s= *(void**)arg;
2345     int mb_x, mb_y;
2346
2347     ff_check_alignment();
2348
2349     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2350         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2351             int xx = mb_x * 16;
2352             int yy = mb_y * 16;
2353             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2354             int varc;
2355             int sum = s->dsp.pix_sum(pix, s->linesize);
2356
2357             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2358
2359             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2360             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2361             s->me.mb_var_sum_temp    += varc;
2362         }
2363     }
2364     return 0;
2365 }
2366
2367 static void write_slice_end(MpegEncContext *s){
2368     if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4){
2369         if(s->partitioned_frame){
2370             ff_mpeg4_merge_partitions(s);
2371         }
2372
2373         ff_mpeg4_stuffing(&s->pb);
2374     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2375         ff_mjpeg_encode_stuffing(&s->pb);
2376     }
2377
2378     avpriv_align_put_bits(&s->pb);
2379     flush_put_bits(&s->pb);
2380
2381     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2382         s->misc_bits+= get_bits_diff(s);
2383 }
2384
2385 static void write_mb_info(MpegEncContext *s)
2386 {
2387     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2388     int offset = put_bits_count(&s->pb);
2389     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2390     int gobn = s->mb_y / s->gob_index;
2391     int pred_x, pred_y;
2392     if (CONFIG_H263_ENCODER)
2393         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2394     bytestream_put_le32(&ptr, offset);
2395     bytestream_put_byte(&ptr, s->qscale);
2396     bytestream_put_byte(&ptr, gobn);
2397     bytestream_put_le16(&ptr, mba);
2398     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2399     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2400     /* 4MV not implemented */
2401     bytestream_put_byte(&ptr, 0); /* hmv2 */
2402     bytestream_put_byte(&ptr, 0); /* vmv2 */
2403 }
2404
2405 static void update_mb_info(MpegEncContext *s, int startcode)
2406 {
2407     if (!s->mb_info)
2408         return;
2409     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2410         s->mb_info_size += 12;
2411         s->prev_mb_info = s->last_mb_info;
2412     }
2413     if (startcode) {
2414         s->prev_mb_info = put_bits_count(&s->pb)/8;
2415         /* This might have incremented mb_info_size above, and we return without
2416          * actually writing any info into that slot yet. But in that case,
2417          * this will be called again at the start of the after writing the
2418          * start code, actually writing the mb info. */
2419         return;
2420     }
2421
2422     s->last_mb_info = put_bits_count(&s->pb)/8;
2423     if (!s->mb_info_size)
2424         s->mb_info_size += 12;
2425     write_mb_info(s);
2426 }
2427
2428 static int encode_thread(AVCodecContext *c, void *arg){
2429     MpegEncContext *s= *(void**)arg;
2430     int mb_x, mb_y, pdif = 0;
2431     int chr_h= 16>>s->chroma_y_shift;
2432     int i, j;
2433     MpegEncContext best_s, backup_s;
2434     uint8_t bit_buf[2][MAX_MB_BYTES];
2435     uint8_t bit_buf2[2][MAX_MB_BYTES];
2436     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2437     PutBitContext pb[2], pb2[2], tex_pb[2];
2438 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2439
2440     ff_check_alignment();
2441
2442     for(i=0; i<2; i++){
2443         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2444         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2445         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2446     }
2447
2448     s->last_bits= put_bits_count(&s->pb);
2449     s->mv_bits=0;
2450     s->misc_bits=0;
2451     s->i_tex_bits=0;
2452     s->p_tex_bits=0;
2453     s->i_count=0;
2454     s->f_count=0;
2455     s->b_count=0;
2456     s->skip_count=0;
2457
2458     for(i=0; i<3; i++){
2459         /* init last dc values */
2460         /* note: quant matrix value (8) is implied here */
2461         s->last_dc[i] = 128 << s->intra_dc_precision;
2462
2463         s->current_picture.f.error[i] = 0;
2464     }
2465     if(s->codec_id==CODEC_ID_AMV){
2466         s->last_dc[0] = 128*8/13;
2467         s->last_dc[1] = 128*8/14;
2468         s->last_dc[2] = 128*8/14;
2469     }
2470     s->mb_skip_run = 0;
2471     memset(s->last_mv, 0, sizeof(s->last_mv));
2472
2473     s->last_mv_dir = 0;
2474
2475     switch(s->codec_id){
2476     case CODEC_ID_H263:
2477     case CODEC_ID_H263P:
2478     case CODEC_ID_FLV1:
2479         if (CONFIG_H263_ENCODER)
2480             s->gob_index = ff_h263_get_gob_height(s);
2481         break;
2482     case CODEC_ID_MPEG4:
2483         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2484             ff_mpeg4_init_partitions(s);
2485         break;
2486     }
2487
2488     s->resync_mb_x=0;
2489     s->resync_mb_y=0;
2490     s->first_slice_line = 1;
2491     s->ptr_lastgob = s->pb.buf;
2492     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2493 //    printf("row %d at %X\n", s->mb_y, (int)s);
2494         s->mb_x=0;
2495         s->mb_y= mb_y;
2496
2497         ff_set_qscale(s, s->qscale);
2498         ff_init_block_index(s);
2499
2500         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2501             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2502             int mb_type= s->mb_type[xy];
2503 //            int d;
2504             int dmin= INT_MAX;
2505             int dir;
2506
2507             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2508                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2509                 return -1;
2510             }
2511             if(s->data_partitioning){
2512                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2513                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2514                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2515                     return -1;
2516                 }
2517             }
2518
2519             s->mb_x = mb_x;
2520             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2521             ff_update_block_index(s);
2522
2523             if(CONFIG_H261_ENCODER && s->codec_id == CODEC_ID_H261){
2524                 ff_h261_reorder_mb_index(s);
2525                 xy= s->mb_y*s->mb_stride + s->mb_x;
2526                 mb_type= s->mb_type[xy];
2527             }
2528
2529             /* write gob / video packet header  */
2530             if(s->rtp_mode){
2531                 int current_packet_size, is_gob_start;
2532
2533                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2534
2535                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2536
2537                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2538
2539                 switch(s->codec_id){
2540                 case CODEC_ID_H263:
2541                 case CODEC_ID_H263P:
2542                     if(!s->h263_slice_structured)
2543                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2544                     break;
2545                 case CODEC_ID_MPEG2VIDEO:
2546                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2547                 case CODEC_ID_MPEG1VIDEO:
2548                     if(s->mb_skip_run) is_gob_start=0;
2549                     break;
2550                 }
2551
2552                 if(is_gob_start){
2553                     if(s->start_mb_y != mb_y || mb_x!=0){
2554                         write_slice_end(s);
2555
2556                         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
2557                             ff_mpeg4_init_partitions(s);
2558                         }
2559                     }
2560
2561                     assert((put_bits_count(&s->pb)&7) == 0);
2562                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2563
2564                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2565                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2566                         int d= 100 / s->avctx->error_rate;
2567                         if(r % d == 0){
2568                             current_packet_size=0;
2569                             s->pb.buf_ptr= s->ptr_lastgob;
2570                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2571                         }
2572                     }
2573
2574                     if (s->avctx->rtp_callback){
2575                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2576                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2577                     }
2578                     update_mb_info(s, 1);
2579
2580                     switch(s->codec_id){
2581                     case CODEC_ID_MPEG4:
2582                         if (CONFIG_MPEG4_ENCODER) {
2583                             ff_mpeg4_encode_video_packet_header(s);
2584                             ff_mpeg4_clean_buffers(s);
2585                         }
2586                     break;
2587                     case CODEC_ID_MPEG1VIDEO:
2588                     case CODEC_ID_MPEG2VIDEO:
2589                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2590                             ff_mpeg1_encode_slice_header(s);
2591                             ff_mpeg1_clean_buffers(s);
2592                         }
2593                     break;
2594                     case CODEC_ID_H263:
2595                     case CODEC_ID_H263P:
2596                         if (CONFIG_H263_ENCODER)
2597                             ff_h263_encode_gob_header(s, mb_y);
2598                     break;
2599                     }
2600
2601                     if(s->flags&CODEC_FLAG_PASS1){
2602                         int bits= put_bits_count(&s->pb);
2603                         s->misc_bits+= bits - s->last_bits;
2604                         s->last_bits= bits;
2605                     }
2606
2607                     s->ptr_lastgob += current_packet_size;
2608                     s->first_slice_line=1;
2609                     s->resync_mb_x=mb_x;
2610                     s->resync_mb_y=mb_y;
2611                 }
2612             }
2613
2614             if(  (s->resync_mb_x   == s->mb_x)
2615                && s->resync_mb_y+1 == s->mb_y){
2616                 s->first_slice_line=0;
2617             }
2618
2619             s->mb_skipped=0;
2620             s->dquant=0; //only for QP_RD
2621
2622             update_mb_info(s, 0);
2623
2624             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2625                 int next_block=0;
2626                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2627
2628                 copy_context_before_encode(&backup_s, s, -1);
2629                 backup_s.pb= s->pb;
2630                 best_s.data_partitioning= s->data_partitioning;
2631                 best_s.partitioned_frame= s->partitioned_frame;
2632                 if(s->data_partitioning){
2633                     backup_s.pb2= s->pb2;
2634                     backup_s.tex_pb= s->tex_pb;
2635                 }
2636
2637                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2638                     s->mv_dir = MV_DIR_FORWARD;
2639                     s->mv_type = MV_TYPE_16X16;
2640                     s->mb_intra= 0;
2641                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2642                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2643                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2644                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2645                 }
2646                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2647                     s->mv_dir = MV_DIR_FORWARD;
2648                     s->mv_type = MV_TYPE_FIELD;
2649                     s->mb_intra= 0;
2650                     for(i=0; i<2; i++){
2651                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2652                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2653                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2654                     }
2655                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2656                                  &dmin, &next_block, 0, 0);
2657                 }
2658                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2659                     s->mv_dir = MV_DIR_FORWARD;
2660                     s->mv_type = MV_TYPE_16X16;
2661                     s->mb_intra= 0;
2662                     s->mv[0][0][0] = 0;
2663                     s->mv[0][0][1] = 0;
2664                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2665                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2666                 }
2667                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2668                     s->mv_dir = MV_DIR_FORWARD;
2669                     s->mv_type = MV_TYPE_8X8;
2670                     s->mb_intra= 0;
2671                     for(i=0; i<4; i++){
2672                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2673                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2674                     }
2675                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2676                                  &dmin, &next_block, 0, 0);
2677                 }
2678                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2679                     s->mv_dir = MV_DIR_FORWARD;
2680                     s->mv_type = MV_TYPE_16X16;
2681                     s->mb_intra= 0;
2682                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2683                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2684                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2685                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2686                 }
2687                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2688                     s->mv_dir = MV_DIR_BACKWARD;
2689                     s->mv_type = MV_TYPE_16X16;
2690                     s->mb_intra= 0;
2691                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2692                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2693                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2694                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2695                 }
2696                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2697                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2698                     s->mv_type = MV_TYPE_16X16;
2699                     s->mb_intra= 0;
2700                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2701                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2702                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2703                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2704                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2705                                  &dmin, &next_block, 0, 0);
2706                 }
2707                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2708                     s->mv_dir = MV_DIR_FORWARD;
2709                     s->mv_type = MV_TYPE_FIELD;
2710                     s->mb_intra= 0;
2711                     for(i=0; i<2; i++){
2712                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2713                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2714                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2715                     }
2716                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2717                                  &dmin, &next_block, 0, 0);
2718                 }
2719                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2720                     s->mv_dir = MV_DIR_BACKWARD;
2721                     s->mv_type = MV_TYPE_FIELD;
2722                     s->mb_intra= 0;
2723                     for(i=0; i<2; i++){
2724                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2725                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2726                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2727                     }
2728                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2729                                  &dmin, &next_block, 0, 0);
2730                 }
2731                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2732                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2733                     s->mv_type = MV_TYPE_FIELD;
2734                     s->mb_intra= 0;
2735                     for(dir=0; dir<2; dir++){
2736                         for(i=0; i<2; i++){
2737                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2738                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2739                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2740                         }
2741                     }
2742                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2743                                  &dmin, &next_block, 0, 0);
2744                 }
2745                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2746                     s->mv_dir = 0;
2747                     s->mv_type = MV_TYPE_16X16;
2748                     s->mb_intra= 1;
2749                     s->mv[0][0][0] = 0;
2750                     s->mv[0][0][1] = 0;
2751                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2752                                  &dmin, &next_block, 0, 0);
2753                     if(s->h263_pred || s->h263_aic){
2754                         if(best_s.mb_intra)
2755                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2756                         else
2757                             ff_clean_intra_table_entries(s); //old mode?
2758                     }
2759                 }
2760
2761                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2762                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2763                         const int last_qp= backup_s.qscale;
2764                         int qpi, qp, dc[6];
2765                         DCTELEM ac[6][16];
2766                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2767                         static const int dquant_tab[4]={-1,1,-2,2};
2768
2769                         assert(backup_s.dquant == 0);
2770
2771                         //FIXME intra
2772                         s->mv_dir= best_s.mv_dir;
2773                         s->mv_type = MV_TYPE_16X16;
2774                         s->mb_intra= best_s.mb_intra;
2775                         s->mv[0][0][0] = best_s.mv[0][0][0];
2776                         s->mv[0][0][1] = best_s.mv[0][0][1];
2777                         s->mv[1][0][0] = best_s.mv[1][0][0];
2778                         s->mv[1][0][1] = best_s.mv[1][0][1];
2779
2780                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2781                         for(; qpi<4; qpi++){
2782                             int dquant= dquant_tab[qpi];
2783                             qp= last_qp + dquant;
2784                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2785                                 continue;
2786                             backup_s.dquant= dquant;
2787                             if(s->mb_intra && s->dc_val[0]){
2788                                 for(i=0; i<6; i++){
2789                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2790                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2791                                 }
2792                             }
2793
2794                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2795                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2796                             if(best_s.qscale != qp){
2797                                 if(s->mb_intra && s->dc_val[0]){
2798                                     for(i=0; i<6; i++){
2799                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2800                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2801                                     }
2802                                 }
2803                             }
2804                         }
2805                     }
2806                 }
2807                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2808                     int mx= s->b_direct_mv_table[xy][0];
2809                     int my= s->b_direct_mv_table[xy][1];
2810
2811                     backup_s.dquant = 0;
2812                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2813                     s->mb_intra= 0;
2814                     ff_mpeg4_set_direct_mv(s, mx, my);
2815                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2816                                  &dmin, &next_block, mx, my);
2817                 }
2818                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2819                     backup_s.dquant = 0;
2820                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2821                     s->mb_intra= 0;
2822                     ff_mpeg4_set_direct_mv(s, 0, 0);
2823                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2824                                  &dmin, &next_block, 0, 0);
2825                 }
2826                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2827                     int coded=0;
2828                     for(i=0; i<6; i++)
2829                         coded |= s->block_last_index[i];
2830                     if(coded){
2831                         int mx,my;
2832                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2833                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2834                             mx=my=0; //FIXME find the one we actually used
2835                             ff_mpeg4_set_direct_mv(s, mx, my);
2836                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2837                             mx= s->mv[1][0][0];
2838                             my= s->mv[1][0][1];
2839                         }else{
2840                             mx= s->mv[0][0][0];
2841                             my= s->mv[0][0][1];
2842                         }
2843
2844                         s->mv_dir= best_s.mv_dir;
2845                         s->mv_type = best_s.mv_type;
2846                         s->mb_intra= 0;
2847 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2848                         s->mv[0][0][1] = best_s.mv[0][0][1];
2849                         s->mv[1][0][0] = best_s.mv[1][0][0];
2850                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2851                         backup_s.dquant= 0;
2852                         s->skipdct=1;
2853                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2854                                         &dmin, &next_block, mx, my);
2855                         s->skipdct=0;
2856                     }
2857                 }
2858
2859                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2860
2861                 copy_context_after_encode(s, &best_s, -1);
2862
2863                 pb_bits_count= put_bits_count(&s->pb);
2864                 flush_put_bits(&s->pb);
2865                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2866                 s->pb= backup_s.pb;
2867
2868                 if(s->data_partitioning){
2869                     pb2_bits_count= put_bits_count(&s->pb2);
2870                     flush_put_bits(&s->pb2);
2871                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2872                     s->pb2= backup_s.pb2;
2873
2874                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2875                     flush_put_bits(&s->tex_pb);
2876                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2877                     s->tex_pb= backup_s.tex_pb;
2878                 }
2879                 s->last_bits= put_bits_count(&s->pb);
2880
2881                 if (CONFIG_H263_ENCODER &&
2882                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2883                     ff_h263_update_motion_val(s);
2884
2885                 if(next_block==0){ //FIXME 16 vs linesize16
2886                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2887                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2888                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2889                 }
2890
2891                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2892                     ff_MPV_decode_mb(s, s->block);
2893             } else {
2894                 int motion_x = 0, motion_y = 0;
2895                 s->mv_type=MV_TYPE_16X16;
2896                 // only one MB-Type possible
2897
2898                 switch(mb_type){
2899                 case CANDIDATE_MB_TYPE_INTRA:
2900                     s->mv_dir = 0;
2901                     s->mb_intra= 1;
2902                     motion_x= s->mv[0][0][0] = 0;
2903                     motion_y= s->mv[0][0][1] = 0;
2904                     break;
2905                 case CANDIDATE_MB_TYPE_INTER:
2906                     s->mv_dir = MV_DIR_FORWARD;
2907                     s->mb_intra= 0;
2908                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2909                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2910                     break;
2911                 case CANDIDATE_MB_TYPE_INTER_I:
2912                     s->mv_dir = MV_DIR_FORWARD;
2913                     s->mv_type = MV_TYPE_FIELD;
2914                     s->mb_intra= 0;
2915                     for(i=0; i<2; i++){
2916                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2917                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2918                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2919                     }
2920                     break;
2921                 case CANDIDATE_MB_TYPE_INTER4V:
2922                     s->mv_dir = MV_DIR_FORWARD;
2923                     s->mv_type = MV_TYPE_8X8;
2924                     s->mb_intra= 0;
2925                     for(i=0; i<4; i++){
2926                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2927                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2928                     }
2929                     break;
2930                 case CANDIDATE_MB_TYPE_DIRECT:
2931                     if (CONFIG_MPEG4_ENCODER) {
2932                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2933                         s->mb_intra= 0;
2934                         motion_x=s->b_direct_mv_table[xy][0];
2935                         motion_y=s->b_direct_mv_table[xy][1];
2936                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2937                     }
2938                     break;
2939                 case CANDIDATE_MB_TYPE_DIRECT0:
2940                     if (CONFIG_MPEG4_ENCODER) {
2941                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2942                         s->mb_intra= 0;
2943                         ff_mpeg4_set_direct_mv(s, 0, 0);
2944                     }
2945                     break;
2946                 case CANDIDATE_MB_TYPE_BIDIR:
2947                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2948                     s->mb_intra= 0;
2949                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2950                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2951                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2952                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2953                     break;
2954                 case CANDIDATE_MB_TYPE_BACKWARD:
2955                     s->mv_dir = MV_DIR_BACKWARD;
2956                     s->mb_intra= 0;
2957                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2958                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2959                     break;
2960                 case CANDIDATE_MB_TYPE_FORWARD:
2961                     s->mv_dir = MV_DIR_FORWARD;
2962                     s->mb_intra= 0;
2963                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2964                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2965 //                    printf(" %d %d ", motion_x, motion_y);
2966                     break;
2967                 case CANDIDATE_MB_TYPE_FORWARD_I:
2968                     s->mv_dir = MV_DIR_FORWARD;
2969                     s->mv_type = MV_TYPE_FIELD;
2970                     s->mb_intra= 0;
2971                     for(i=0; i<2; i++){
2972                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2973                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2974                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2975                     }
2976                     break;
2977                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2978                     s->mv_dir = MV_DIR_BACKWARD;
2979                     s->mv_type = MV_TYPE_FIELD;
2980                     s->mb_intra= 0;
2981                     for(i=0; i<2; i++){
2982                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2983                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2984                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2985                     }
2986                     break;
2987                 case CANDIDATE_MB_TYPE_BIDIR_I:
2988                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2989                     s->mv_type = MV_TYPE_FIELD;
2990                     s->mb_intra= 0;
2991                     for(dir=0; dir<2; dir++){
2992                         for(i=0; i<2; i++){
2993                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2994                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2995                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2996                         }
2997                     }
2998                     break;
2999                 default:
3000                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3001                 }
3002
3003                 encode_mb(s, motion_x, motion_y);
3004
3005                 // RAL: Update last macroblock type
3006                 s->last_mv_dir = s->mv_dir;
3007
3008                 if (CONFIG_H263_ENCODER &&
3009                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3010                     ff_h263_update_motion_val(s);
3011
3012                 ff_MPV_decode_mb(s, s->block);
3013             }
3014
3015             /* clean the MV table in IPS frames for direct mode in B frames */
3016             if(s->mb_intra /* && I,P,S_TYPE */){
3017                 s->p_mv_table[xy][0]=0;
3018                 s->p_mv_table[xy][1]=0;
3019             }
3020
3021             if(s->flags&CODEC_FLAG_PSNR){
3022                 int w= 16;
3023                 int h= 16;
3024
3025                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3026                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3027
3028                 s->current_picture.f.error[0] += sse(
3029                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3030                     s->dest[0], w, h, s->linesize);
3031                 s->current_picture.f.error[1] += sse(
3032                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3033                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3034                 s->current_picture.f.error[2] += sse(
3035                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3036                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3037             }
3038             if(s->loop_filter){
3039                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3040                     ff_h263_loop_filter(s);
3041             }
3042 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
3043         }
3044     }
3045
3046     //not beautiful here but we must write it before flushing so it has to be here
3047     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3048         ff_msmpeg4_encode_ext_header(s);
3049
3050     write_slice_end(s);
3051
3052     /* Send the last GOB if RTP */
3053     if (s->avctx->rtp_callback) {
3054         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3055         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3056         /* Call the RTP callback to send the last GOB */
3057         emms_c();
3058         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3059     }
3060
3061     return 0;
3062 }
3063
3064 #define MERGE(field) dst->field += src->field; src->field=0
3065 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3066     MERGE(me.scene_change_score);
3067     MERGE(me.mc_mb_var_sum_temp);
3068     MERGE(me.mb_var_sum_temp);
3069 }
3070
3071 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3072     int i;
3073
3074     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3075     MERGE(dct_count[1]);
3076     MERGE(mv_bits);
3077     MERGE(i_tex_bits);
3078     MERGE(p_tex_bits);
3079     MERGE(i_count);
3080     MERGE(f_count);
3081     MERGE(b_count);
3082     MERGE(skip_count);
3083     MERGE(misc_bits);
3084     MERGE(error_count);
3085     MERGE(padding_bug_score);
3086     MERGE(current_picture.f.error[0]);
3087     MERGE(current_picture.f.error[1]);
3088     MERGE(current_picture.f.error[2]);
3089
3090     if(dst->avctx->noise_reduction){
3091         for(i=0; i<64; i++){
3092             MERGE(dct_error_sum[0][i]);
3093             MERGE(dct_error_sum[1][i]);
3094         }
3095     }
3096
3097     assert(put_bits_count(&src->pb) % 8 ==0);
3098     assert(put_bits_count(&dst->pb) % 8 ==0);
3099     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3100     flush_put_bits(&dst->pb);
3101 }
3102
3103 static int estimate_qp(MpegEncContext *s, int dry_run){
3104     if (s->next_lambda){
3105         s->current_picture_ptr->f.quality =
3106         s->current_picture.f.quality = s->next_lambda;
3107         if(!dry_run) s->next_lambda= 0;
3108     } else if (!s->fixed_qscale) {
3109         s->current_picture_ptr->f.quality =
3110         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3111         if (s->current_picture.f.quality < 0)
3112             return -1;
3113     }
3114
3115     if(s->adaptive_quant){
3116         switch(s->codec_id){
3117         case CODEC_ID_MPEG4:
3118             if (CONFIG_MPEG4_ENCODER)
3119                 ff_clean_mpeg4_qscales(s);
3120             break;
3121         case CODEC_ID_H263:
3122         case CODEC_ID_H263P:
3123         case CODEC_ID_FLV1:
3124             if (CONFIG_H263_ENCODER)
3125                 ff_clean_h263_qscales(s);
3126             break;
3127         default:
3128             ff_init_qscale_tab(s);
3129         }
3130
3131         s->lambda= s->lambda_table[0];
3132         //FIXME broken
3133     }else
3134         s->lambda = s->current_picture.f.quality;
3135 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3136     update_qscale(s);
3137     return 0;
3138 }
3139
3140 /* must be called before writing the header */
3141 static void set_frame_distances(MpegEncContext * s){
3142     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3143     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3144
3145     if(s->pict_type==AV_PICTURE_TYPE_B){
3146         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3147         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3148     }else{
3149         s->pp_time= s->time - s->last_non_b_time;
3150         s->last_non_b_time= s->time;
3151         assert(s->picture_number==0 || s->pp_time > 0);
3152     }
3153 }
3154
3155 static int encode_picture(MpegEncContext *s, int picture_number)
3156 {
3157     int i;
3158     int bits;
3159     int context_count = s->slice_context_count;
3160
3161     s->picture_number = picture_number;
3162
3163     /* Reset the average MB variance */
3164     s->me.mb_var_sum_temp    =
3165     s->me.mc_mb_var_sum_temp = 0;
3166
3167     /* we need to initialize some time vars before we can encode b-frames */
3168     // RAL: Condition added for MPEG1VIDEO
3169     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3170         set_frame_distances(s);
3171     if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4)
3172         ff_set_mpeg4_time(s);
3173
3174     s->me.scene_change_score=0;
3175
3176 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3177
3178     if(s->pict_type==AV_PICTURE_TYPE_I){
3179         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3180         else                        s->no_rounding=0;
3181     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3182         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3183             s->no_rounding ^= 1;
3184     }
3185
3186     if(s->flags & CODEC_FLAG_PASS2){
3187         if (estimate_qp(s,1) < 0)
3188             return -1;
3189         ff_get_2pass_fcode(s);
3190     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3191         if(s->pict_type==AV_PICTURE_TYPE_B)
3192             s->lambda= s->last_lambda_for[s->pict_type];
3193         else
3194             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3195         update_qscale(s);
3196     }
3197
3198     if(s->codec_id != CODEC_ID_AMV){
3199         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3200         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3201         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3202         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3203     }
3204
3205     s->mb_intra=0; //for the rate distortion & bit compare functions
3206     for(i=1; i<context_count; i++){
3207         ff_update_duplicate_context(s->thread_context[i], s);
3208     }
3209
3210     if(ff_init_me(s)<0)
3211         return -1;
3212
3213     /* Estimate motion for every MB */
3214     if(s->pict_type != AV_PICTURE_TYPE_I){
3215         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3216         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3217         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3218             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3219                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3220             }
3221         }
3222
3223         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3224     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3225         /* I-Frame */
3226         for(i=0; i<s->mb_stride*s->mb_height; i++)
3227             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3228
3229         if(!s->fixed_qscale){
3230             /* finding spatial complexity for I-frame rate control */
3231             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3232         }
3233     }
3234     for(i=1; i<context_count; i++){
3235         merge_context_after_me(s, s->thread_context[i]);
3236     }
3237     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3238     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3239     emms_c();
3240
3241     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3242         s->pict_type= AV_PICTURE_TYPE_I;
3243         for(i=0; i<s->mb_stride*s->mb_height; i++)
3244             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3245 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3246     }
3247
3248     if(!s->umvplus){
3249         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3250             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3251
3252             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3253                 int a,b;
3254                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3255                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3256                 s->f_code= FFMAX3(s->f_code, a, b);
3257             }
3258
3259             ff_fix_long_p_mvs(s);
3260             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3261             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3262                 int j;
3263                 for(i=0; i<2; i++){
3264                     for(j=0; j<2; j++)
3265                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3266                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3267                 }
3268             }
3269         }
3270
3271         if(s->pict_type==AV_PICTURE_TYPE_B){
3272             int a, b;
3273
3274             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3275             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3276             s->f_code = FFMAX(a, b);
3277
3278             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3279             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3280             s->b_code = FFMAX(a, b);
3281
3282             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3283             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3284             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3285             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3286             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3287                 int dir, j;
3288                 for(dir=0; dir<2; dir++){
3289                     for(i=0; i<2; i++){
3290                         for(j=0; j<2; j++){
3291                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3292                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3293                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3294                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3295                         }
3296                     }
3297                 }
3298             }
3299         }
3300     }
3301
3302     if (estimate_qp(s, 0) < 0)
3303         return -1;
3304
3305     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3306         s->qscale= 3; //reduce clipping problems
3307
3308     if (s->out_format == FMT_MJPEG) {
3309         /* for mjpeg, we do include qscale in the matrix */
3310         for(i=1;i<64;i++){
3311             int j= s->dsp.idct_permutation[i];
3312
3313             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3314         }
3315         s->y_dc_scale_table=
3316         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3317         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3318         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3319                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3320         s->qscale= 8;
3321     }
3322     if(s->codec_id == CODEC_ID_AMV){
3323         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3324         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3325         for(i=1;i<64;i++){
3326             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3327
3328             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3329             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3330         }
3331         s->y_dc_scale_table= y;
3332         s->c_dc_scale_table= c;
3333         s->intra_matrix[0] = 13;
3334         s->chroma_intra_matrix[0] = 14;
3335         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3336                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3337         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3338                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3339         s->qscale= 8;
3340     }
3341
3342     //FIXME var duplication
3343     s->current_picture_ptr->f.key_frame =
3344     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3345     s->current_picture_ptr->f.pict_type =
3346     s->current_picture.f.pict_type = s->pict_type;
3347
3348     if (s->current_picture.f.key_frame)
3349         s->picture_in_gop_number=0;
3350
3351     s->last_bits= put_bits_count(&s->pb);
3352     switch(s->out_format) {
3353     case FMT_MJPEG:
3354         if (CONFIG_MJPEG_ENCODER)
3355             ff_mjpeg_encode_picture_header(s);
3356         break;
3357     case FMT_H261:
3358         if (CONFIG_H261_ENCODER)
3359             ff_h261_encode_picture_header(s, picture_number);
3360         break;
3361     case FMT_H263:
3362         if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
3363             ff_wmv2_encode_picture_header(s, picture_number);
3364         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3365             ff_msmpeg4_encode_picture_header(s, picture_number);
3366         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3367             ff_mpeg4_encode_picture_header(s, picture_number);
3368         else if (CONFIG_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
3369             ff_rv10_encode_picture_header(s, picture_number);
3370         else if (CONFIG_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
3371             ff_rv20_encode_picture_header(s, picture_number);
3372         else if (CONFIG_FLV_ENCODER && s->codec_id == CODEC_ID_FLV1)
3373             ff_flv_encode_picture_header(s, picture_number);
3374         else if (CONFIG_H263_ENCODER)
3375             ff_h263_encode_picture_header(s, picture_number);
3376         break;
3377     case FMT_MPEG1:
3378         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3379             ff_mpeg1_encode_picture_header(s, picture_number);
3380         break;
3381     case FMT_H264:
3382         break;
3383     default:
3384         assert(0);
3385     }
3386     bits= put_bits_count(&s->pb);
3387     s->header_bits= bits - s->last_bits;
3388
3389     for(i=1; i<context_count; i++){
3390         update_duplicate_context_after_me(s->thread_context[i], s);
3391     }
3392     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3393     for(i=1; i<context_count; i++){
3394         merge_context_after_encode(s, s->thread_context[i]);
3395     }
3396     emms_c();
3397     return 0;
3398 }
3399
3400 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3401     const int intra= s->mb_intra;
3402     int i;
3403
3404     s->dct_count[intra]++;
3405
3406     for(i=0; i<64; i++){
3407         int level= block[i];
3408
3409         if(level){
3410             if(level>0){
3411                 s->dct_error_sum[intra][i] += level;
3412                 level -= s->dct_offset[intra][i];
3413                 if(level<0) level=0;
3414             }else{
3415                 s->dct_error_sum[intra][i] -= level;
3416                 level += s->dct_offset[intra][i];
3417                 if(level>0) level=0;
3418             }
3419             block[i]= level;
3420         }
3421     }
3422 }
3423
3424 static int dct_quantize_trellis_c(MpegEncContext *s,
3425                                   DCTELEM *block, int n,
3426                                   int qscale, int *overflow){
3427     const int *qmat;
3428     const uint8_t *scantable= s->intra_scantable.scantable;
3429     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3430     int max=0;
3431     unsigned int threshold1, threshold2;
3432     int bias=0;
3433     int run_tab[65];
3434     int level_tab[65];
3435     int score_tab[65];
3436     int survivor[65];
3437     int survivor_count;
3438     int last_run=0;
3439     int last_level=0;
3440     int last_score= 0;
3441     int last_i;
3442     int coeff[2][64];
3443     int coeff_count[64];
3444     int qmul, qadd, start_i, last_non_zero, i, dc;
3445     const int esc_length= s->ac_esc_length;
3446     uint8_t * length;
3447     uint8_t * last_length;
3448     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3449
3450     s->dsp.fdct (block);
3451
3452     if(s->dct_error_sum)
3453         s->denoise_dct(s, block);
3454     qmul= qscale*16;
3455     qadd= ((qscale-1)|1)*8;
3456
3457     if (s->mb_intra) {
3458         int q;
3459         if (!s->h263_aic) {
3460             if (n < 4)
3461                 q = s->y_dc_scale;
3462             else
3463                 q = s->c_dc_scale;
3464             q = q << 3;
3465         } else{
3466             /* For AIC we skip quant/dequant of INTRADC */
3467             q = 1 << 3;
3468             qadd=0;
3469         }
3470
3471         /* note: block[0] is assumed to be positive */
3472         block[0] = (block[0] + (q >> 1)) / q;
3473         start_i = 1;
3474         last_non_zero = 0;
3475         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3476         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3477             bias= 1<<(QMAT_SHIFT-1);
3478         length     = s->intra_ac_vlc_length;
3479         last_length= s->intra_ac_vlc_last_length;
3480     } else {
3481         start_i = 0;
3482         last_non_zero = -1;
3483         qmat = s->q_inter_matrix[qscale];
3484         length     = s->inter_ac_vlc_length;
3485         last_length= s->inter_ac_vlc_last_length;
3486     }
3487     last_i= start_i;
3488
3489     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3490     threshold2= (threshold1<<1);
3491
3492     for(i=63; i>=start_i; i--) {
3493         const int j = scantable[i];
3494         int level = block[j] * qmat[j];
3495
3496         if(((unsigned)(level+threshold1))>threshold2){
3497             last_non_zero = i;
3498             break;
3499         }
3500     }
3501
3502     for(i=start_i; i<=last_non_zero; i++) {
3503         const int j = scantable[i];
3504         int level = block[j] * qmat[j];
3505
3506 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3507 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3508         if(((unsigned)(level+threshold1))>threshold2){
3509             if(level>0){
3510                 level= (bias + level)>>QMAT_SHIFT;
3511                 coeff[0][i]= level;
3512                 coeff[1][i]= level-1;
3513 //                coeff[2][k]= level-2;
3514             }else{
3515                 level= (bias - level)>>QMAT_SHIFT;
3516                 coeff[0][i]= -level;
3517                 coeff[1][i]= -level+1;
3518 //                coeff[2][k]= -level+2;
3519             }
3520             coeff_count[i]= FFMIN(level, 2);
3521             assert(coeff_count[i]);
3522             max |=level;
3523         }else{
3524             coeff[0][i]= (level>>31)|1;
3525             coeff_count[i]= 1;
3526         }
3527     }
3528
3529     *overflow= s->max_qcoeff < max; //overflow might have happened
3530
3531     if(last_non_zero < start_i){
3532         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3533         return last_non_zero;
3534     }
3535
3536     score_tab[start_i]= 0;
3537     survivor[0]= start_i;
3538     survivor_count= 1;
3539
3540     for(i=start_i; i<=last_non_zero; i++){
3541         int level_index, j, zero_distortion;
3542         int dct_coeff= FFABS(block[ scantable[i] ]);
3543         int best_score=256*256*256*120;
3544
3545         if (s->dsp.fdct == ff_fdct_ifast)
3546             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3547         zero_distortion= dct_coeff*dct_coeff;
3548
3549         for(level_index=0; level_index < coeff_count[i]; level_index++){
3550             int distortion;
3551             int level= coeff[level_index][i];
3552             const int alevel= FFABS(level);
3553             int unquant_coeff;
3554
3555             assert(level);
3556
3557             if(s->out_format == FMT_H263){
3558                 unquant_coeff= alevel*qmul + qadd;
3559             }else{ //MPEG1
3560                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3561                 if(s->mb_intra){
3562                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3563                         unquant_coeff =   (unquant_coeff - 1) | 1;
3564                 }else{
3565                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3566                         unquant_coeff =   (unquant_coeff - 1) | 1;
3567                 }
3568                 unquant_coeff<<= 3;
3569             }
3570
3571             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3572             level+=64;
3573             if((level&(~127)) == 0){
3574                 for(j=survivor_count-1; j>=0; j--){
3575                     int run= i - survivor[j];
3576                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3577                     score += score_tab[i-run];
3578
3579                     if(score < best_score){
3580                         best_score= score;
3581                         run_tab[i+1]= run;
3582                         level_tab[i+1]= level-64;
3583                     }
3584                 }
3585
3586                 if(s->out_format == FMT_H263){
3587                     for(j=survivor_count-1; j>=0; j--){
3588                         int run= i - survivor[j];
3589                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3590                         score += score_tab[i-run];
3591                         if(score < last_score){
3592                             last_score= score;
3593                             last_run= run;
3594                             last_level= level-64;
3595                             last_i= i+1;
3596                         }
3597                     }
3598                 }
3599             }else{
3600                 distortion += esc_length*lambda;
3601                 for(j=survivor_count-1; j>=0; j--){
3602                     int run= i - survivor[j];
3603                     int score= distortion + score_tab[i-run];
3604
3605                     if(score < best_score){
3606                         best_score= score;
3607                         run_tab[i+1]= run;
3608                         level_tab[i+1]= level-64;
3609                     }
3610                 }
3611
3612                 if(s->out_format == FMT_H263){
3613                   for(j=survivor_count-1; j>=0; j--){
3614                         int run= i - survivor[j];
3615                         int score= distortion + score_tab[i-run];
3616                         if(score < last_score){
3617                             last_score= score;
3618                             last_run= run;
3619                             last_level= level-64;
3620                             last_i= i+1;
3621                         }
3622                     }
3623                 }
3624             }
3625         }
3626
3627         score_tab[i+1]= best_score;
3628
3629         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3630         if(last_non_zero <= 27){
3631             for(; survivor_count; survivor_count--){
3632                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3633                     break;
3634             }
3635         }else{
3636             for(; survivor_count; survivor_count--){
3637                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3638                     break;
3639             }
3640         }
3641
3642         survivor[ survivor_count++ ]= i+1;
3643     }
3644
3645     if(s->out_format != FMT_H263){
3646         last_score= 256*256*256*120;
3647         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3648             int score= score_tab[i];
3649             if(i) score += lambda*2; //FIXME exacter?
3650
3651             if(score < last_score){
3652                 last_score= score;
3653                 last_i= i;
3654                 last_level= level_tab[i];
3655                 last_run= run_tab[i];
3656             }
3657         }
3658     }
3659
3660     s->coded_score[n] = last_score;
3661
3662     dc= FFABS(block[0]);
3663     last_non_zero= last_i - 1;
3664     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3665
3666     if(last_non_zero < start_i)
3667         return last_non_zero;
3668
3669     if(last_non_zero == 0 && start_i == 0){
3670         int best_level= 0;
3671         int best_score= dc * dc;
3672
3673         for(i=0; i<coeff_count[0]; i++){
3674             int level= coeff[i][0];
3675             int alevel= FFABS(level);
3676             int unquant_coeff, score, distortion;
3677
3678             if(s->out_format == FMT_H263){
3679                     unquant_coeff= (alevel*qmul + qadd)>>3;
3680             }else{ //MPEG1
3681                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3682                     unquant_coeff =   (unquant_coeff - 1) | 1;
3683             }
3684             unquant_coeff = (unquant_coeff + 4) >> 3;
3685             unquant_coeff<<= 3 + 3;
3686
3687             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3688             level+=64;
3689             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3690             else                    score= distortion + esc_length*lambda;
3691
3692             if(score < best_score){
3693                 best_score= score;
3694                 best_level= level - 64;
3695             }
3696         }
3697         block[0]= best_level;
3698         s->coded_score[n] = best_score - dc*dc;
3699         if(best_level == 0) return -1;
3700         else                return last_non_zero;
3701     }
3702
3703     i= last_i;
3704     assert(last_level);
3705
3706     block[ perm_scantable[last_non_zero] ]= last_level;
3707     i -= last_run + 1;
3708
3709     for(; i>start_i; i -= run_tab[i] + 1){
3710         block[ perm_scantable[i-1] ]= level_tab[i];
3711     }
3712
3713     return last_non_zero;
3714 }
3715
3716 //#define REFINE_STATS 1
3717 static int16_t basis[64][64];
3718
3719 static void build_basis(uint8_t *perm){
3720     int i, j, x, y;
3721     emms_c();
3722     for(i=0; i<8; i++){
3723         for(j=0; j<8; j++){
3724             for(y=0; y<8; y++){
3725                 for(x=0; x<8; x++){
3726                     double s= 0.25*(1<<BASIS_SHIFT);
3727                     int index= 8*i + j;
3728                     int perm_index= perm[index];
3729                     if(i==0) s*= sqrt(0.5);
3730                     if(j==0) s*= sqrt(0.5);
3731                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3732                 }
3733             }
3734         }
3735     }
3736 }
3737
3738 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3739                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3740                         int n, int qscale){
3741     int16_t rem[64];
3742     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3743     const uint8_t *scantable= s->intra_scantable.scantable;
3744     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3745 //    unsigned int threshold1, threshold2;
3746 //    int bias=0;
3747     int run_tab[65];
3748     int prev_run=0;
3749     int prev_level=0;
3750     int qmul, qadd, start_i, last_non_zero, i, dc;
3751     uint8_t * length;
3752     uint8_t * last_length;
3753     int lambda;
3754     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3755 #ifdef REFINE_STATS
3756 static int count=0;
3757 static int after_last=0;
3758 static int to_zero=0;
3759 static int from_zero=0;
3760 static int raise=0;
3761 static int lower=0;
3762 static int messed_sign=0;
3763 #endif
3764
3765     if(basis[0][0] == 0)
3766         build_basis(s->dsp.idct_permutation);
3767
3768     qmul= qscale*2;
3769     qadd= (qscale-1)|1;
3770     if (s->mb_intra) {
3771         if (!s->h263_aic) {
3772             if (n < 4)
3773                 q = s->y_dc_scale;
3774             else
3775                 q = s->c_dc_scale;
3776         } else{
3777             /* For AIC we skip quant/dequant of INTRADC */
3778             q = 1;
3779             qadd=0;
3780         }
3781         q <<= RECON_SHIFT-3;
3782         /* note: block[0] is assumed to be positive */
3783         dc= block[0]*q;
3784 //        block[0] = (block[0] + (q >> 1)) / q;
3785         start_i = 1;
3786 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3787 //            bias= 1<<(QMAT_SHIFT-1);
3788         length     = s->intra_ac_vlc_length;
3789         last_length= s->intra_ac_vlc_last_length;
3790     } else {
3791         dc= 0;
3792         start_i = 0;
3793         length     = s->inter_ac_vlc_length;
3794         last_length= s->inter_ac_vlc_last_length;
3795     }
3796     last_non_zero = s->block_last_index[n];
3797
3798 #ifdef REFINE_STATS
3799 {START_TIMER
3800 #endif
3801     dc += (1<<(RECON_SHIFT-1));
3802     for(i=0; i<64; i++){
3803         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3804     }
3805 #ifdef REFINE_STATS
3806 STOP_TIMER("memset rem[]")}
3807 #endif
3808     sum=0;
3809     for(i=0; i<64; i++){
3810         int one= 36;
3811         int qns=4;
3812         int w;
3813
3814         w= FFABS(weight[i]) + qns*one;
3815         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3816
3817         weight[i] = w;
3818 //        w=weight[i] = (63*qns + (w/2)) / w;
3819
3820         assert(w>0);
3821         assert(w<(1<<6));
3822         sum += w*w;
3823     }
3824     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3825 #ifdef REFINE_STATS
3826 {START_TIMER
3827 #endif
3828     run=0;
3829     rle_index=0;
3830     for(i=start_i; i<=last_non_zero; i++){
3831         int j= perm_scantable[i];
3832         const int level= block[j];
3833         int coeff;
3834
3835         if(level){
3836             if(level<0) coeff= qmul*level - qadd;
3837             else        coeff= qmul*level + qadd;
3838             run_tab[rle_index++]=run;
3839             run=0;
3840
3841             s->dsp.add_8x8basis(rem, basis[j], coeff);
3842         }else{
3843             run++;
3844         }
3845     }
3846 #ifdef REFINE_STATS
3847 if(last_non_zero>0){
3848 STOP_TIMER("init rem[]")
3849 }
3850 }
3851
3852 {START_TIMER
3853 #endif
3854     for(;;){
3855         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3856         int best_coeff=0;
3857         int best_change=0;
3858         int run2, best_unquant_change=0, analyze_gradient;
3859 #ifdef REFINE_STATS
3860 {START_TIMER
3861 #endif
3862         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3863
3864         if(analyze_gradient){
3865 #ifdef REFINE_STATS
3866 {START_TIMER
3867 #endif
3868             for(i=0; i<64; i++){
3869                 int w= weight[i];
3870
3871                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3872             }
3873 #ifdef REFINE_STATS
3874 STOP_TIMER("rem*w*w")}
3875 {START_TIMER
3876 #endif
3877             s->dsp.fdct(d1);
3878 #ifdef REFINE_STATS
3879 STOP_TIMER("dct")}
3880 #endif
3881         }
3882
3883         if(start_i){
3884             const int level= block[0];
3885             int change, old_coeff;
3886
3887             assert(s->mb_intra);
3888
3889             old_coeff= q*level;
3890
3891             for(change=-1; change<=1; change+=2){
3892                 int new_level= level + change;
3893                 int score, new_coeff;
3894
3895                 new_coeff= q*new_level;
3896                 if(new_coeff >= 2048 || new_coeff < 0)
3897                     continue;
3898
3899                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3900                 if(score<best_score){
3901                     best_score= score;
3902                     best_coeff= 0;
3903                     best_change= change;
3904                     best_unquant_change= new_coeff - old_coeff;
3905                 }
3906             }
3907         }
3908
3909         run=0;
3910         rle_index=0;
3911         run2= run_tab[rle_index++];
3912         prev_level=0;
3913         prev_run=0;
3914
3915         for(i=start_i; i<64; i++){
3916             int j= perm_scantable[i];
3917             const int level= block[j];
3918             int change, old_coeff;
3919
3920             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3921                 break;
3922
3923             if(level){
3924                 if(level<0) old_coeff= qmul*level - qadd;
3925                 else        old_coeff= qmul*level + qadd;
3926                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3927             }else{
3928                 old_coeff=0;
3929                 run2--;
3930                 assert(run2>=0 || i >= last_non_zero );
3931             }
3932
3933             for(change=-1; change<=1; change+=2){
3934                 int new_level= level + change;
3935                 int score, new_coeff, unquant_change;
3936
3937                 score=0;
3938                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3939                    continue;
3940
3941                 if(new_level){
3942                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3943                     else            new_coeff= qmul*new_level + qadd;
3944                     if(new_coeff >= 2048 || new_coeff <= -2048)
3945                         continue;
3946                     //FIXME check for overflow
3947
3948                     if(level){
3949                         if(level < 63 && level > -63){
3950                             if(i < last_non_zero)
3951                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3952                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3953                             else
3954                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3955                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3956                         }
3957                     }else{
3958                         assert(FFABS(new_level)==1);
3959
3960                         if(analyze_gradient){
3961                             int g= d1[ scantable[i] ];
3962                             if(g && (g^new_level) >= 0)
3963                                 continue;
3964                         }
3965
3966                         if(i < last_non_zero){
3967                             int next_i= i + run2 + 1;
3968                             int next_level= block[ perm_scantable[next_i] ] + 64;
3969
3970                             if(next_level&(~127))
3971                                 next_level= 0;
3972
3973                             if(next_i < last_non_zero)
3974                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3975                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3976                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3977                             else
3978                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3979                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3980                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3981                         }else{
3982                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3983                             if(prev_level){
3984                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3985                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3986                             }
3987                         }
3988                     }
3989                 }else{
3990                     new_coeff=0;
3991                     assert(FFABS(level)==1);
3992
3993                     if(i < last_non_zero){
3994                         int next_i= i + run2 + 1;
3995                         int next_level= block[ perm_scantable[next_i] ] + 64;
3996
3997                         if(next_level&(~127))
3998                             next_level= 0;
3999
4000                         if(next_i < last_non_zero)
4001                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4002                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4003                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4004                         else
4005                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4006                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4007                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4008                     }else{
4009                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4010                         if(prev_level){
4011                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4012                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4013                         }
4014                     }
4015                 }
4016
4017                 score *= lambda;
4018
4019                 unquant_change= new_coeff - old_coeff;
4020                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4021
4022                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4023                 if(score<best_score){
4024                     best_score= score;
4025                     best_coeff= i;
4026                     best_change= change;
4027                     best_unquant_change= unquant_change;
4028                 }
4029             }
4030             if(level){
4031                 prev_level= level + 64;
4032                 if(prev_level&(~127))
4033                     prev_level= 0;
4034                 prev_run= run;
4035                 run=0;
4036             }else{
4037                 run++;
4038             }
4039         }
4040 #ifdef REFINE_STATS
4041 STOP_TIMER("iterative step")}
4042 #endif
4043
4044         if(best_change){
4045             int j= perm_scantable[ best_coeff ];
4046
4047             block[j] += best_change;
4048
4049             if(best_coeff > last_non_zero){
4050                 last_non_zero= best_coeff;
4051                 assert(block[j]);
4052 #ifdef REFINE_STATS
4053 after_last++;
4054 #endif
4055             }else{
4056 #ifdef REFINE_STATS
4057 if(block[j]){
4058     if(block[j] - best_change){
4059         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4060             raise++;
4061         }else{
4062             lower++;
4063         }
4064     }else{
4065         from_zero++;
4066     }
4067 }else{
4068     to_zero++;
4069 }
4070 #endif
4071                 for(; last_non_zero>=start_i; last_non_zero--){
4072                     if(block[perm_scantable[last_non_zero]])
4073                         break;
4074                 }
4075             }
4076 #ifdef REFINE_STATS
4077 count++;
4078 if(256*256*256*64 % count == 0){
4079     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4080 }
4081 #endif
4082             run=0;
4083             rle_index=0;
4084             for(i=start_i; i<=last_non_zero; i++){
4085                 int j= perm_scantable[i];
4086                 const int level= block[j];
4087
4088                  if(level){
4089                      run_tab[rle_index++]=run;
4090                      run=0;
4091                  }else{
4092                      run++;
4093                  }
4094             }
4095
4096             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4097         }else{
4098             break;
4099         }
4100     }
4101 #ifdef REFINE_STATS
4102 if(last_non_zero>0){
4103 STOP_TIMER("iterative search")
4104 }
4105 }
4106 #endif
4107
4108     return last_non_zero;
4109 }
4110
4111 int ff_dct_quantize_c(MpegEncContext *s,
4112                         DCTELEM *block, int n,
4113                         int qscale, int *overflow)
4114 {
4115     int i, j, level, last_non_zero, q, start_i;
4116     const int *qmat;
4117     const uint8_t *scantable= s->intra_scantable.scantable;
4118     int bias;
4119     int max=0;
4120     unsigned int threshold1, threshold2;
4121
4122     s->dsp.fdct (block);
4123
4124     if(s->dct_error_sum)
4125         s->denoise_dct(s, block);
4126
4127     if (s->mb_intra) {
4128         if (!s->h263_aic) {
4129             if (n < 4)
4130                 q = s->y_dc_scale;
4131             else
4132                 q = s->c_dc_scale;
4133             q = q << 3;
4134         } else
4135             /* For AIC we skip quant/dequant of INTRADC */
4136             q = 1 << 3;
4137
4138         /* note: block[0] is assumed to be positive */
4139         block[0] = (block[0] + (q >> 1)) / q;
4140         start_i = 1;
4141         last_non_zero = 0;
4142         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4143         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4144     } else {
4145         start_i = 0;
4146         last_non_zero = -1;
4147         qmat = s->q_inter_matrix[qscale];
4148         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4149     }
4150     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4151     threshold2= (threshold1<<1);
4152     for(i=63;i>=start_i;i--) {
4153         j = scantable[i];
4154         level = block[j] * qmat[j];
4155
4156         if(((unsigned)(level+threshold1))>threshold2){
4157             last_non_zero = i;
4158             break;
4159         }else{
4160             block[j]=0;
4161         }
4162     }
4163     for(i=start_i; i<=last_non_zero; i++) {
4164         j = scantable[i];
4165         level = block[j] * qmat[j];
4166
4167 //        if(   bias+level >= (1<<QMAT_SHIFT)
4168 //           || bias-level >= (1<<QMAT_SHIFT)){
4169         if(((unsigned)(level+threshold1))>threshold2){
4170             if(level>0){
4171                 level= (bias + level)>>QMAT_SHIFT;
4172                 block[j]= level;
4173             }else{
4174                 level= (bias - level)>>QMAT_SHIFT;
4175                 block[j]= -level;
4176             }
4177             max |=level;
4178         }else{
4179             block[j]=0;
4180         }
4181     }
4182     *overflow= s->max_qcoeff < max; //overflow might have happened
4183
4184     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4185     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4186         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4187
4188     return last_non_zero;
4189 }
4190
4191 #define OFFSET(x) offsetof(MpegEncContext, x)
4192 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4193 static const AVOption h263_options[] = {
4194     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4195     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4196     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { 0 }, 0, INT_MAX, VE },
4197     FF_MPV_COMMON_OPTS
4198     { NULL },
4199 };
4200
4201 static const AVClass h263_class = {
4202     .class_name = "H.263 encoder",
4203     .item_name  = av_default_item_name,
4204     .option     = h263_options,
4205     .version    = LIBAVUTIL_VERSION_INT,
4206 };
4207
4208 AVCodec ff_h263_encoder = {
4209     .name           = "h263",
4210     .type           = AVMEDIA_TYPE_VIDEO,
4211     .id             = CODEC_ID_H263,
4212     .priv_data_size = sizeof(MpegEncContext),
4213     .init           = ff_MPV_encode_init,
4214     .encode2        = ff_MPV_encode_picture,
4215     .close          = ff_MPV_encode_end,
4216     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4217     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4218     .priv_class     = &h263_class,
4219 };
4220
4221 static const AVOption h263p_options[] = {
4222     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4223     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4224     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4225     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4226     FF_MPV_COMMON_OPTS
4227     { NULL },
4228 };
4229 static const AVClass h263p_class = {
4230     .class_name = "H.263p encoder",
4231     .item_name  = av_default_item_name,
4232     .option     = h263p_options,
4233     .version    = LIBAVUTIL_VERSION_INT,
4234 };
4235
4236 AVCodec ff_h263p_encoder = {
4237     .name           = "h263p",
4238     .type           = AVMEDIA_TYPE_VIDEO,
4239     .id             = CODEC_ID_H263P,
4240     .priv_data_size = sizeof(MpegEncContext),
4241     .init           = ff_MPV_encode_init,
4242     .encode2        = ff_MPV_encode_picture,
4243     .close          = ff_MPV_encode_end,
4244     .capabilities   = CODEC_CAP_SLICE_THREADS,
4245     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4246     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4247     .priv_class     = &h263p_class,
4248 };
4249
4250 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4251
4252 AVCodec ff_msmpeg4v2_encoder = {
4253     .name           = "msmpeg4v2",
4254     .type           = AVMEDIA_TYPE_VIDEO,
4255     .id             = CODEC_ID_MSMPEG4V2,
4256     .priv_data_size = sizeof(MpegEncContext),
4257     .init           = ff_MPV_encode_init,
4258     .encode2        = ff_MPV_encode_picture,
4259     .close          = ff_MPV_encode_end,
4260     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4261     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4262     .priv_class     = &msmpeg4v2_class,
4263 };
4264
4265 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4266
4267 AVCodec ff_msmpeg4v3_encoder = {
4268     .name           = "msmpeg4",
4269     .type           = AVMEDIA_TYPE_VIDEO,
4270     .id             = CODEC_ID_MSMPEG4V3,
4271     .priv_data_size = sizeof(MpegEncContext),
4272     .init           = ff_MPV_encode_init,
4273     .encode2        = ff_MPV_encode_picture,
4274     .close          = ff_MPV_encode_end,
4275     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4276     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4277     .priv_class     = &msmpeg4v3_class,
4278 };
4279
4280 FF_MPV_GENERIC_CLASS(wmv1)
4281
4282 AVCodec ff_wmv1_encoder = {
4283     .name           = "wmv1",
4284     .type           = AVMEDIA_TYPE_VIDEO,
4285     .id             = CODEC_ID_WMV1,
4286     .priv_data_size = sizeof(MpegEncContext),
4287     .init           = ff_MPV_encode_init,
4288     .encode2        = ff_MPV_encode_picture,
4289     .close          = ff_MPV_encode_end,
4290     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4291     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4292     .priv_class     = &wmv1_class,
4293 };