]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge remote-tracking branch 'qatar/master'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "mpegvideo_common.h"
37 #include "h263.h"
38 #include "mjpegenc.h"
39 #include "msmpeg4.h"
40 #include "faandct.h"
41 #include "thread.h"
42 #include "aandcttab.h"
43 #include "flv.h"
44 #include "mpeg4video.h"
45 #include "internal.h"
46 #include "bytestream.h"
47 #include <limits.h>
48 #include "sp5x.h"
49
50 //#undef NDEBUG
51 //#include <assert.h>
52
53 static int encode_picture(MpegEncContext *s, int picture_number);
54 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
55 static int sse_mb(MpegEncContext *s);
56 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
57 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
58
59 /* enable all paranoid tests for rounding, overflows, etc... */
60 //#define PARANOID
61
62 //#define DEBUG
63
64 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
65 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
66
67 const AVOption ff_mpv_generic_options[] = {
68     FF_MPV_COMMON_OPTS
69     { NULL },
70 };
71
72 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
73                        uint16_t (*qmat16)[2][64],
74                        const uint16_t *quant_matrix,
75                        int bias, int qmin, int qmax, int intra)
76 {
77     int qscale;
78     int shift = 0;
79
80     for (qscale = qmin; qscale <= qmax; qscale++) {
81         int i;
82         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
83             dsp->fdct == ff_jpeg_fdct_islow_10
84 #ifdef FAAN_POSTSCALE
85             || dsp->fdct == ff_faandct
86 #endif
87             ) {
88             for (i = 0; i < 64; i++) {
89                 const int j = dsp->idct_permutation[i];
90                 /* 16 <= qscale * quant_matrix[i] <= 7905
91                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
92                  *             19952 <=              x  <= 249205026
93                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
94                  *           3444240 >= (1 << 36) / (x) >= 275 */
95
96                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
97                                         (qscale * quant_matrix[j]));
98             }
99         } else if (dsp->fdct == ff_fdct_ifast
100 #ifndef FAAN_POSTSCALE
101                    || dsp->fdct == ff_faandct
102 #endif
103                    ) {
104             for (i = 0; i < 64; i++) {
105                 const int j = dsp->idct_permutation[i];
106                 /* 16 <= qscale * quant_matrix[i] <= 7905
107                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
108                  *             19952 <=              x  <= 249205026
109                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
110                  *           3444240 >= (1 << 36) / (x) >= 275 */
111
112                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
113                                         (ff_aanscales[i] * qscale * quant_matrix[j]));
114             }
115         } else {
116             for (i = 0; i < 64; i++) {
117                 const int j = dsp->idct_permutation[i];
118                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
119                  * Assume x = qscale * quant_matrix[i]
120                  * So             16 <=              x  <= 7905
121                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
122                  * so          32768 >= (1 << 19) / (x) >= 67 */
123                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
124                                         (qscale * quant_matrix[j]));
125                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
126                 //                    (qscale * quant_matrix[i]);
127                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
128                                        (qscale * quant_matrix[j]);
129
130                 if (qmat16[qscale][0][i] == 0 ||
131                     qmat16[qscale][0][i] == 128 * 256)
132                     qmat16[qscale][0][i] = 128 * 256 - 1;
133                 qmat16[qscale][1][i] =
134                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
135                                 qmat16[qscale][0][i]);
136             }
137         }
138
139         for (i = intra; i < 64; i++) {
140             int64_t max = 8191;
141             if (dsp->fdct == ff_fdct_ifast
142 #ifndef FAAN_POSTSCALE
143                 || dsp->fdct == ff_faandct
144 #endif
145                ) {
146                 max = (8191LL * ff_aanscales[i]) >> 14;
147             }
148             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
149                 shift++;
150             }
151         }
152     }
153     if (shift) {
154         av_log(NULL, AV_LOG_INFO,
155                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
156                QMAT_SHIFT - shift);
157     }
158 }
159
160 static inline void update_qscale(MpegEncContext *s)
161 {
162     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
163                 (FF_LAMBDA_SHIFT + 7);
164     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
165
166     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
167                  FF_LAMBDA_SHIFT;
168 }
169
170 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
171 {
172     int i;
173
174     if (matrix) {
175         put_bits(pb, 1, 1);
176         for (i = 0; i < 64; i++) {
177             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
178         }
179     } else
180         put_bits(pb, 1, 0);
181 }
182
183 /**
184  * init s->current_picture.qscale_table from s->lambda_table
185  */
186 void ff_init_qscale_tab(MpegEncContext *s)
187 {
188     int8_t * const qscale_table = s->current_picture.f.qscale_table;
189     int i;
190
191     for (i = 0; i < s->mb_num; i++) {
192         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
193         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
194         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
195                                                   s->avctx->qmax);
196     }
197 }
198
199 static void copy_picture_attributes(MpegEncContext *s,
200                                     AVFrame *dst,
201                                     AVFrame *src)
202 {
203     int i;
204
205     dst->pict_type              = src->pict_type;
206     dst->quality                = src->quality;
207     dst->coded_picture_number   = src->coded_picture_number;
208     dst->display_picture_number = src->display_picture_number;
209     //dst->reference              = src->reference;
210     dst->pts                    = src->pts;
211     dst->interlaced_frame       = src->interlaced_frame;
212     dst->top_field_first        = src->top_field_first;
213
214     if (s->avctx->me_threshold) {
215         if (!src->motion_val[0])
216             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
217         if (!src->mb_type)
218             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
219         if (!src->ref_index[0])
220             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
221         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
222             av_log(s->avctx, AV_LOG_ERROR,
223                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
224                    src->motion_subsample_log2, dst->motion_subsample_log2);
225
226         memcpy(dst->mb_type, src->mb_type,
227                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
228
229         for (i = 0; i < 2; i++) {
230             int stride = ((16 * s->mb_width ) >>
231                           src->motion_subsample_log2) + 1;
232             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
233
234             if (src->motion_val[i] &&
235                 src->motion_val[i] != dst->motion_val[i]) {
236                 memcpy(dst->motion_val[i], src->motion_val[i],
237                        2 * stride * height * sizeof(int16_t));
238             }
239             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
240                 memcpy(dst->ref_index[i], src->ref_index[i],
241                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
242             }
243         }
244     }
245 }
246
247 static void update_duplicate_context_after_me(MpegEncContext *dst,
248                                               MpegEncContext *src)
249 {
250 #define COPY(a) dst->a= src->a
251     COPY(pict_type);
252     COPY(current_picture);
253     COPY(f_code);
254     COPY(b_code);
255     COPY(qscale);
256     COPY(lambda);
257     COPY(lambda2);
258     COPY(picture_in_gop_number);
259     COPY(gop_picture_number);
260     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
261     COPY(progressive_frame);    // FIXME don't set in encode_header
262     COPY(partitioned_frame);    // FIXME don't set in encode_header
263 #undef COPY
264 }
265
266 /**
267  * Set the given MpegEncContext to defaults for encoding.
268  * the changed fields will not depend upon the prior state of the MpegEncContext.
269  */
270 static void MPV_encode_defaults(MpegEncContext *s)
271 {
272     int i;
273     ff_MPV_common_defaults(s);
274
275     for (i = -16; i < 16; i++) {
276         default_fcode_tab[i + MAX_MV] = 1;
277     }
278     s->me.mv_penalty = default_mv_penalty;
279     s->fcode_tab     = default_fcode_tab;
280 }
281
282 /* init video encoder */
283 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
284 {
285     MpegEncContext *s = avctx->priv_data;
286     int i;
287     int chroma_h_shift, chroma_v_shift;
288
289     MPV_encode_defaults(s);
290
291     switch (avctx->codec_id) {
292     case CODEC_ID_MPEG2VIDEO:
293         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
294             avctx->pix_fmt != PIX_FMT_YUV422P) {
295             av_log(avctx, AV_LOG_ERROR,
296                    "only YUV420 and YUV422 are supported\n");
297             return -1;
298         }
299         break;
300     case CODEC_ID_LJPEG:
301         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
302             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
303             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
304             avctx->pix_fmt != PIX_FMT_BGR0     &&
305             avctx->pix_fmt != PIX_FMT_BGRA     &&
306             avctx->pix_fmt != PIX_FMT_BGR24    &&
307             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
308               avctx->pix_fmt != PIX_FMT_YUV422P &&
309               avctx->pix_fmt != PIX_FMT_YUV444P) ||
310              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
311             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
312             return -1;
313         }
314         break;
315     case CODEC_ID_MJPEG:
316     case CODEC_ID_AMV:
317         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
318             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
319             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
320               avctx->pix_fmt != PIX_FMT_YUV422P) ||
321              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
322             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
323             return -1;
324         }
325         break;
326     default:
327         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
328             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
329             return -1;
330         }
331     }
332
333     switch (avctx->pix_fmt) {
334     case PIX_FMT_YUVJ422P:
335     case PIX_FMT_YUV422P:
336         s->chroma_format = CHROMA_422;
337         break;
338     case PIX_FMT_YUVJ420P:
339     case PIX_FMT_YUV420P:
340     default:
341         s->chroma_format = CHROMA_420;
342         break;
343     }
344
345     s->bit_rate = avctx->bit_rate;
346     s->width    = avctx->width;
347     s->height   = avctx->height;
348     if (avctx->gop_size > 600 &&
349         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
350         av_log(avctx, AV_LOG_WARNING,
351                "keyframe interval too large!, reducing it from %d to %d\n",
352                avctx->gop_size, 600);
353         avctx->gop_size = 600;
354     }
355     s->gop_size     = avctx->gop_size;
356     s->avctx        = avctx;
357     s->flags        = avctx->flags;
358     s->flags2       = avctx->flags2;
359     s->max_b_frames = avctx->max_b_frames;
360     s->codec_id     = avctx->codec->id;
361 #if FF_API_MPV_GLOBAL_OPTS
362     if (avctx->luma_elim_threshold)
363         s->luma_elim_threshold   = avctx->luma_elim_threshold;
364     if (avctx->chroma_elim_threshold)
365         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
366 #endif
367     s->strict_std_compliance = avctx->strict_std_compliance;
368     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
369     s->mpeg_quant         = avctx->mpeg_quant;
370     s->rtp_mode           = !!avctx->rtp_payload_size;
371     s->intra_dc_precision = avctx->intra_dc_precision;
372     s->user_specified_pts = AV_NOPTS_VALUE;
373
374     if (s->gop_size <= 1) {
375         s->intra_only = 1;
376         s->gop_size   = 12;
377     } else {
378         s->intra_only = 0;
379     }
380
381     s->me_method = avctx->me_method;
382
383     /* Fixed QSCALE */
384     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
385
386 #if FF_API_MPV_GLOBAL_OPTS
387     if (s->flags & CODEC_FLAG_QP_RD)
388         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
389 #endif
390
391     s->adaptive_quant = (s->avctx->lumi_masking ||
392                          s->avctx->dark_masking ||
393                          s->avctx->temporal_cplx_masking ||
394                          s->avctx->spatial_cplx_masking  ||
395                          s->avctx->p_masking      ||
396                          s->avctx->border_masking ||
397                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
398                         !s->fixed_qscale;
399
400     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
401
402     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
403         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
404         return -1;
405     }
406
407     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
408         av_log(avctx, AV_LOG_INFO,
409                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
410     }
411
412     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
413         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
414         return -1;
415     }
416
417     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
418         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
419         return -1;
420     }
421
422     if (avctx->rc_max_rate &&
423         avctx->rc_max_rate == avctx->bit_rate &&
424         avctx->rc_max_rate != avctx->rc_min_rate) {
425         av_log(avctx, AV_LOG_INFO,
426                "impossible bitrate constraints, this will fail\n");
427     }
428
429     if (avctx->rc_buffer_size &&
430         avctx->bit_rate * (int64_t)avctx->time_base.num >
431             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
432         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
433         return -1;
434     }
435
436     if (!s->fixed_qscale &&
437         avctx->bit_rate * av_q2d(avctx->time_base) >
438             avctx->bit_rate_tolerance) {
439         av_log(avctx, AV_LOG_ERROR,
440                "bitrate tolerance too small for bitrate\n");
441         return -1;
442     }
443
444     if (s->avctx->rc_max_rate &&
445         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
446         (s->codec_id == CODEC_ID_MPEG1VIDEO ||
447          s->codec_id == CODEC_ID_MPEG2VIDEO) &&
448         90000LL * (avctx->rc_buffer_size - 1) >
449             s->avctx->rc_max_rate * 0xFFFFLL) {
450         av_log(avctx, AV_LOG_INFO,
451                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
452                "specified vbv buffer is too large for the given bitrate!\n");
453     }
454
455     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != CODEC_ID_MPEG4 &&
456         s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P &&
457         s->codec_id != CODEC_ID_FLV1) {
458         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
459         return -1;
460     }
461
462     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
463         av_log(avctx, AV_LOG_ERROR,
464                "OBMC is only supported with simple mb decision\n");
465         return -1;
466     }
467
468     if (s->quarter_sample && s->codec_id != CODEC_ID_MPEG4) {
469         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
470         return -1;
471     }
472
473     if (s->max_b_frames                    &&
474         s->codec_id != CODEC_ID_MPEG4      &&
475         s->codec_id != CODEC_ID_MPEG1VIDEO &&
476         s->codec_id != CODEC_ID_MPEG2VIDEO) {
477         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
478         return -1;
479     }
480
481     if ((s->codec_id == CODEC_ID_MPEG4 ||
482          s->codec_id == CODEC_ID_H263  ||
483          s->codec_id == CODEC_ID_H263P) &&
484         (avctx->sample_aspect_ratio.num > 255 ||
485          avctx->sample_aspect_ratio.den > 255)) {
486         av_log(avctx, AV_LOG_WARNING,
487                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
488                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
489         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
490                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
491     }
492
493     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
494         s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO) {
495         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
496         return -1;
497     }
498
499     // FIXME mpeg2 uses that too
500     if (s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4) {
501         av_log(avctx, AV_LOG_ERROR,
502                "mpeg2 style quantization not supported by codec\n");
503         return -1;
504     }
505
506 #if FF_API_MPV_GLOBAL_OPTS
507     if (s->flags & CODEC_FLAG_CBP_RD)
508         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
509 #endif
510
511     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
512         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
513         return -1;
514     }
515
516     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
517         s->avctx->mb_decision != FF_MB_DECISION_RD) {
518         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
519         return -1;
520     }
521
522     if (s->avctx->scenechange_threshold < 1000000000 &&
523         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
524         av_log(avctx, AV_LOG_ERROR,
525                "closed gop with scene change detection are not supported yet, "
526                "set threshold to 1000000000\n");
527         return -1;
528     }
529
530     if (s->flags & CODEC_FLAG_LOW_DELAY) {
531         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
532             av_log(avctx, AV_LOG_ERROR,
533                   "low delay forcing is only available for mpeg2\n");
534             return -1;
535         }
536         if (s->max_b_frames != 0) {
537             av_log(avctx, AV_LOG_ERROR,
538                    "b frames cannot be used with low delay\n");
539             return -1;
540         }
541     }
542
543     if (s->q_scale_type == 1) {
544         if (avctx->qmax > 12) {
545             av_log(avctx, AV_LOG_ERROR,
546                    "non linear quant only supports qmax <= 12 currently\n");
547             return -1;
548         }
549     }
550
551     if (s->avctx->thread_count > 1         &&
552         s->codec_id != CODEC_ID_MPEG4      &&
553         s->codec_id != CODEC_ID_MPEG1VIDEO &&
554         s->codec_id != CODEC_ID_MPEG2VIDEO &&
555         (s->codec_id != CODEC_ID_H263P)) {
556         av_log(avctx, AV_LOG_ERROR,
557                "multi threaded encoding not supported by codec\n");
558         return -1;
559     }
560
561     if (s->avctx->thread_count < 1) {
562         av_log(avctx, AV_LOG_ERROR,
563                "automatic thread number detection not supported by codec, "
564                "patch welcome\n");
565         return -1;
566     }
567
568     if (s->avctx->thread_count > 1)
569         s->rtp_mode = 1;
570
571     if (!avctx->time_base.den || !avctx->time_base.num) {
572         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
573         return -1;
574     }
575
576     i = (INT_MAX / 2 + 128) >> 8;
577     if (avctx->me_threshold >= i) {
578         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
579                i - 1);
580         return -1;
581     }
582     if (avctx->mb_threshold >= i) {
583         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
584                i - 1);
585         return -1;
586     }
587
588     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
589         av_log(avctx, AV_LOG_INFO,
590                "notice: b_frame_strategy only affects the first pass\n");
591         avctx->b_frame_strategy = 0;
592     }
593
594     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
595     if (i > 1) {
596         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
597         avctx->time_base.den /= i;
598         avctx->time_base.num /= i;
599         //return -1;
600     }
601
602     if (s->mpeg_quant || s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || s->codec_id == CODEC_ID_MJPEG || s->codec_id==CODEC_ID_AMV) {
603         // (a + x * 3 / 8) / x
604         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
605         s->inter_quant_bias = 0;
606     } else {
607         s->intra_quant_bias = 0;
608         // (a - x / 4) / x
609         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
610     }
611
612     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
613         s->intra_quant_bias = avctx->intra_quant_bias;
614     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
615         s->inter_quant_bias = avctx->inter_quant_bias;
616
617     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
618
619     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
620                                   &chroma_v_shift);
621
622     if (avctx->codec_id == CODEC_ID_MPEG4 &&
623         s->avctx->time_base.den > (1 << 16) - 1) {
624         av_log(avctx, AV_LOG_ERROR,
625                "timebase %d/%d not supported by MPEG 4 standard, "
626                "the maximum admitted value for the timebase denominator "
627                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
628                (1 << 16) - 1);
629         return -1;
630     }
631     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
632
633 #if FF_API_MPV_GLOBAL_OPTS
634     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
635         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
636     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
637         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
638     if (avctx->quantizer_noise_shaping)
639         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
640 #endif
641
642     switch (avctx->codec->id) {
643     case CODEC_ID_MPEG1VIDEO:
644         s->out_format = FMT_MPEG1;
645         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
646         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
647         break;
648     case CODEC_ID_MPEG2VIDEO:
649         s->out_format = FMT_MPEG1;
650         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
651         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
652         s->rtp_mode   = 1;
653         break;
654     case CODEC_ID_LJPEG:
655     case CODEC_ID_MJPEG:
656     case CODEC_ID_AMV:
657         s->out_format = FMT_MJPEG;
658         s->intra_only = 1; /* force intra only for jpeg */
659         if (avctx->codec->id == CODEC_ID_LJPEG &&
660             (avctx->pix_fmt == PIX_FMT_BGR0
661              || s->avctx->pix_fmt == PIX_FMT_BGRA
662              || s->avctx->pix_fmt == PIX_FMT_BGR24)) {
663             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
664             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
665             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
666         } else {
667             s->mjpeg_vsample[0] = 2;
668             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
669             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
670             s->mjpeg_hsample[0] = 2;
671             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
672             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
673         }
674         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
675             ff_mjpeg_encode_init(s) < 0)
676             return -1;
677         avctx->delay = 0;
678         s->low_delay = 1;
679         break;
680     case CODEC_ID_H261:
681         if (!CONFIG_H261_ENCODER)
682             return -1;
683         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
684             av_log(avctx, AV_LOG_ERROR,
685                    "The specified picture size of %dx%d is not valid for the "
686                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
687                     s->width, s->height);
688             return -1;
689         }
690         s->out_format = FMT_H261;
691         avctx->delay  = 0;
692         s->low_delay  = 1;
693         break;
694     case CODEC_ID_H263:
695         if (!CONFIG_H263_ENCODER)
696             return -1;
697         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
698                              s->width, s->height) == 8) {
699             av_log(avctx, AV_LOG_ERROR,
700                    "The specified picture size of %dx%d is not valid for "
701                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
702                    "352x288, 704x576, and 1408x1152. "
703                    "Try H.263+.\n", s->width, s->height);
704             return -1;
705         }
706         s->out_format = FMT_H263;
707         avctx->delay  = 0;
708         s->low_delay  = 1;
709         break;
710     case CODEC_ID_H263P:
711         s->out_format = FMT_H263;
712         s->h263_plus  = 1;
713         /* Fx */
714         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
715         s->modified_quant  = s->h263_aic;
716         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
717         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
718
719         /* /Fx */
720         /* These are just to be sure */
721         avctx->delay = 0;
722         s->low_delay = 1;
723         break;
724     case CODEC_ID_FLV1:
725         s->out_format      = FMT_H263;
726         s->h263_flv        = 2; /* format = 1; 11-bit codes */
727         s->unrestricted_mv = 1;
728         s->rtp_mode  = 0; /* don't allow GOB */
729         avctx->delay = 0;
730         s->low_delay = 1;
731         break;
732     case CODEC_ID_RV10:
733         s->out_format = FMT_H263;
734         avctx->delay  = 0;
735         s->low_delay  = 1;
736         break;
737     case CODEC_ID_RV20:
738         s->out_format      = FMT_H263;
739         avctx->delay       = 0;
740         s->low_delay       = 1;
741         s->modified_quant  = 1;
742         s->h263_aic        = 1;
743         s->h263_plus       = 1;
744         s->loop_filter     = 1;
745         s->unrestricted_mv = 0;
746         break;
747     case CODEC_ID_MPEG4:
748         s->out_format      = FMT_H263;
749         s->h263_pred       = 1;
750         s->unrestricted_mv = 1;
751         s->low_delay       = s->max_b_frames ? 0 : 1;
752         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
753         break;
754     case CODEC_ID_MSMPEG4V2:
755         s->out_format      = FMT_H263;
756         s->h263_pred       = 1;
757         s->unrestricted_mv = 1;
758         s->msmpeg4_version = 2;
759         avctx->delay       = 0;
760         s->low_delay       = 1;
761         break;
762     case CODEC_ID_MSMPEG4V3:
763         s->out_format        = FMT_H263;
764         s->h263_pred         = 1;
765         s->unrestricted_mv   = 1;
766         s->msmpeg4_version   = 3;
767         s->flipflop_rounding = 1;
768         avctx->delay         = 0;
769         s->low_delay         = 1;
770         break;
771     case CODEC_ID_WMV1:
772         s->out_format        = FMT_H263;
773         s->h263_pred         = 1;
774         s->unrestricted_mv   = 1;
775         s->msmpeg4_version   = 4;
776         s->flipflop_rounding = 1;
777         avctx->delay         = 0;
778         s->low_delay         = 1;
779         break;
780     case CODEC_ID_WMV2:
781         s->out_format        = FMT_H263;
782         s->h263_pred         = 1;
783         s->unrestricted_mv   = 1;
784         s->msmpeg4_version   = 5;
785         s->flipflop_rounding = 1;
786         avctx->delay         = 0;
787         s->low_delay         = 1;
788         break;
789     default:
790         return -1;
791     }
792
793     avctx->has_b_frames = !s->low_delay;
794
795     s->encoding = 1;
796
797     s->progressive_frame    =
798     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
799                                                 CODEC_FLAG_INTERLACED_ME) ||
800                                 s->alternate_scan);
801
802     /* init */
803     if (ff_MPV_common_init(s) < 0)
804         return -1;
805
806     if (!s->dct_quantize)
807         s->dct_quantize = ff_dct_quantize_c;
808     if (!s->denoise_dct)
809         s->denoise_dct  = denoise_dct_c;
810     s->fast_dct_quantize = s->dct_quantize;
811     if (avctx->trellis)
812         s->dct_quantize  = dct_quantize_trellis_c;
813
814     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
815         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
816
817     s->quant_precision = 5;
818
819     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
820     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
821
822     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
823         ff_h261_encode_init(s);
824     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
825         ff_h263_encode_init(s);
826     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
827         ff_msmpeg4_encode_init(s);
828     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
829         && s->out_format == FMT_MPEG1)
830         ff_mpeg1_encode_init(s);
831
832     /* init q matrix */
833     for (i = 0; i < 64; i++) {
834         int j = s->dsp.idct_permutation[i];
835         if (CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4 &&
836             s->mpeg_quant) {
837             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
838             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
839         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
840             s->intra_matrix[j] =
841             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
842         } else {
843             /* mpeg1/2 */
844             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
845             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
846         }
847         if (s->avctx->intra_matrix)
848             s->intra_matrix[j] = s->avctx->intra_matrix[i];
849         if (s->avctx->inter_matrix)
850             s->inter_matrix[j] = s->avctx->inter_matrix[i];
851     }
852
853     /* precompute matrix */
854     /* for mjpeg, we do include qscale in the matrix */
855     if (s->out_format != FMT_MJPEG) {
856         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
857                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
858                           31, 1);
859         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
860                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
861                           31, 0);
862     }
863
864     if (ff_rate_control_init(s) < 0)
865         return -1;
866
867     return 0;
868 }
869
870 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
871 {
872     MpegEncContext *s = avctx->priv_data;
873
874     ff_rate_control_uninit(s);
875
876     ff_MPV_common_end(s);
877     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
878         s->out_format == FMT_MJPEG)
879         ff_mjpeg_encode_close(s);
880
881     av_freep(&avctx->extradata);
882
883     return 0;
884 }
885
886 static int get_sae(uint8_t *src, int ref, int stride)
887 {
888     int x,y;
889     int acc = 0;
890
891     for (y = 0; y < 16; y++) {
892         for (x = 0; x < 16; x++) {
893             acc += FFABS(src[x + y * stride] - ref);
894         }
895     }
896
897     return acc;
898 }
899
900 static int get_intra_count(MpegEncContext *s, uint8_t *src,
901                            uint8_t *ref, int stride)
902 {
903     int x, y, w, h;
904     int acc = 0;
905
906     w = s->width  & ~15;
907     h = s->height & ~15;
908
909     for (y = 0; y < h; y += 16) {
910         for (x = 0; x < w; x += 16) {
911             int offset = x + y * stride;
912             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
913                                      16);
914             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
915             int sae  = get_sae(src + offset, mean, stride);
916
917             acc += sae + 500 < sad;
918         }
919     }
920     return acc;
921 }
922
923
924 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
925 {
926     AVFrame *pic = NULL;
927     int64_t pts;
928     int i;
929     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
930                                                  (s->low_delay ? 0 : 1);
931     int direct = 1;
932
933     if (pic_arg) {
934         pts = pic_arg->pts;
935         pic_arg->display_picture_number = s->input_picture_number++;
936
937         if (pts != AV_NOPTS_VALUE) {
938             if (s->user_specified_pts != AV_NOPTS_VALUE) {
939                 int64_t time = pts;
940                 int64_t last = s->user_specified_pts;
941
942                 if (time <= last) {
943                     av_log(s->avctx, AV_LOG_ERROR,
944                            "Error, Invalid timestamp=%"PRId64", "
945                            "last=%"PRId64"\n", pts, s->user_specified_pts);
946                     return -1;
947                 }
948
949                 if (!s->low_delay && pic_arg->display_picture_number == 1)
950                     s->dts_delta = time - last;
951             }
952             s->user_specified_pts = pts;
953         } else {
954             if (s->user_specified_pts != AV_NOPTS_VALUE) {
955                 s->user_specified_pts =
956                 pts = s->user_specified_pts + 1;
957                 av_log(s->avctx, AV_LOG_INFO,
958                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
959                        pts);
960             } else {
961                 pts = pic_arg->display_picture_number;
962             }
963         }
964     }
965
966   if (pic_arg) {
967     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
968         direct = 0;
969     if (pic_arg->linesize[0] != s->linesize)
970         direct = 0;
971     if (pic_arg->linesize[1] != s->uvlinesize)
972         direct = 0;
973     if (pic_arg->linesize[2] != s->uvlinesize)
974         direct = 0;
975
976     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
977     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
978
979     if (direct) {
980         i = ff_find_unused_picture(s, 1);
981         if (i < 0)
982             return i;
983
984         pic = &s->picture[i].f;
985         pic->reference = 3;
986
987         for (i = 0; i < 4; i++) {
988             pic->data[i]     = pic_arg->data[i];
989             pic->linesize[i] = pic_arg->linesize[i];
990         }
991         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
992             return -1;
993         }
994     } else {
995         i = ff_find_unused_picture(s, 0);
996         if (i < 0)
997             return i;
998
999         pic = &s->picture[i].f;
1000         pic->reference = 3;
1001
1002         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
1003             return -1;
1004         }
1005
1006         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1007             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1008             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1009             // empty
1010         } else {
1011             int h_chroma_shift, v_chroma_shift;
1012             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
1013                                           &v_chroma_shift);
1014
1015             for (i = 0; i < 3; i++) {
1016                 int src_stride = pic_arg->linesize[i];
1017                 int dst_stride = i ? s->uvlinesize : s->linesize;
1018                 int h_shift = i ? h_chroma_shift : 0;
1019                 int v_shift = i ? v_chroma_shift : 0;
1020                 int w = s->width  >> h_shift;
1021                 int h = s->height >> v_shift;
1022                 uint8_t *src = pic_arg->data[i];
1023                 uint8_t *dst = pic->data[i];
1024
1025                 if(s->codec_id == CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)){
1026                     h= ((s->height+15)/16*16)>>v_shift;
1027                 }
1028
1029                 if (!s->avctx->rc_buffer_size)
1030                     dst += INPLACE_OFFSET;
1031
1032                 if (src_stride == dst_stride)
1033                     memcpy(dst, src, src_stride * h);
1034                 else {
1035                     while (h--) {
1036                         memcpy(dst, src, w);
1037                         dst += dst_stride;
1038                         src += src_stride;
1039                     }
1040                 }
1041             }
1042         }
1043     }
1044     copy_picture_attributes(s, pic, pic_arg);
1045     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1046   }
1047
1048     /* shift buffer entries */
1049     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1050         s->input_picture[i - 1] = s->input_picture[i];
1051
1052     s->input_picture[encoding_delay] = (Picture*) pic;
1053
1054     return 0;
1055 }
1056
1057 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1058 {
1059     int x, y, plane;
1060     int score = 0;
1061     int64_t score64 = 0;
1062
1063     for (plane = 0; plane < 3; plane++) {
1064         const int stride = p->f.linesize[plane];
1065         const int bw = plane ? 1 : 2;
1066         for (y = 0; y < s->mb_height * bw; y++) {
1067             for (x = 0; x < s->mb_width * bw; x++) {
1068                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1069                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1070                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1071                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1072
1073                 switch (s->avctx->frame_skip_exp) {
1074                 case 0: score    =  FFMAX(score, v);          break;
1075                 case 1: score   += FFABS(v);                  break;
1076                 case 2: score   += v * v;                     break;
1077                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1078                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1079                 }
1080             }
1081         }
1082     }
1083
1084     if (score)
1085         score64 = score;
1086
1087     if (score64 < s->avctx->frame_skip_threshold)
1088         return 1;
1089     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1090         return 1;
1091     return 0;
1092 }
1093
1094 static int estimate_best_b_count(MpegEncContext *s)
1095 {
1096     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1097     AVCodecContext *c = avcodec_alloc_context3(NULL);
1098     AVFrame input[FF_MAX_B_FRAMES + 2];
1099     const int scale = s->avctx->brd_scale;
1100     int i, j, out_size, p_lambda, b_lambda, lambda2;
1101     int outbuf_size  = s->width * s->height; // FIXME
1102     uint8_t *outbuf  = av_malloc(outbuf_size);
1103     int64_t best_rd  = INT64_MAX;
1104     int best_b_count = -1;
1105
1106     assert(scale >= 0 && scale <= 3);
1107
1108     //emms_c();
1109     //s->next_picture_ptr->quality;
1110     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1111     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1112     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1113     if (!b_lambda) // FIXME we should do this somewhere else
1114         b_lambda = p_lambda;
1115     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1116                FF_LAMBDA_SHIFT;
1117
1118     c->width        = s->width  >> scale;
1119     c->height       = s->height >> scale;
1120     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1121                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1122     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1123     c->mb_decision  = s->avctx->mb_decision;
1124     c->me_cmp       = s->avctx->me_cmp;
1125     c->mb_cmp       = s->avctx->mb_cmp;
1126     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1127     c->pix_fmt      = PIX_FMT_YUV420P;
1128     c->time_base    = s->avctx->time_base;
1129     c->max_b_frames = s->max_b_frames;
1130
1131     if (avcodec_open2(c, codec, NULL) < 0)
1132         return -1;
1133
1134     for (i = 0; i < s->max_b_frames + 2; i++) {
1135         int ysize = c->width * c->height;
1136         int csize = (c->width / 2) * (c->height / 2);
1137         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1138                                                 s->next_picture_ptr;
1139
1140         avcodec_get_frame_defaults(&input[i]);
1141         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1142         input[i].data[1]     = input[i].data[0] + ysize;
1143         input[i].data[2]     = input[i].data[1] + csize;
1144         input[i].linesize[0] = c->width;
1145         input[i].linesize[1] =
1146         input[i].linesize[2] = c->width / 2;
1147
1148         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1149             pre_input = *pre_input_ptr;
1150
1151             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1152                 pre_input.f.data[0] += INPLACE_OFFSET;
1153                 pre_input.f.data[1] += INPLACE_OFFSET;
1154                 pre_input.f.data[2] += INPLACE_OFFSET;
1155             }
1156
1157             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1158                                  pre_input.f.data[0], pre_input.f.linesize[0],
1159                                  c->width,      c->height);
1160             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1161                                  pre_input.f.data[1], pre_input.f.linesize[1],
1162                                  c->width >> 1, c->height >> 1);
1163             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1164                                  pre_input.f.data[2], pre_input.f.linesize[2],
1165                                  c->width >> 1, c->height >> 1);
1166         }
1167     }
1168
1169     for (j = 0; j < s->max_b_frames + 1; j++) {
1170         int64_t rd = 0;
1171
1172         if (!s->input_picture[j])
1173             break;
1174
1175         c->error[0] = c->error[1] = c->error[2] = 0;
1176
1177         input[0].pict_type = AV_PICTURE_TYPE_I;
1178         input[0].quality   = 1 * FF_QP2LAMBDA;
1179         out_size           = avcodec_encode_video(c, outbuf,
1180                                                   outbuf_size, &input[0]);
1181         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1182
1183         for (i = 0; i < s->max_b_frames + 1; i++) {
1184             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1185
1186             input[i + 1].pict_type = is_p ?
1187                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1188             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1189             out_size = avcodec_encode_video(c, outbuf, outbuf_size,
1190                                             &input[i + 1]);
1191             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1192         }
1193
1194         /* get the delayed frames */
1195         while (out_size) {
1196             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
1197             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1198         }
1199
1200         rd += c->error[0] + c->error[1] + c->error[2];
1201
1202         if (rd < best_rd) {
1203             best_rd = rd;
1204             best_b_count = j;
1205         }
1206     }
1207
1208     av_freep(&outbuf);
1209     avcodec_close(c);
1210     av_freep(&c);
1211
1212     for (i = 0; i < s->max_b_frames + 2; i++) {
1213         av_freep(&input[i].data[0]);
1214     }
1215
1216     return best_b_count;
1217 }
1218
1219 static int select_input_picture(MpegEncContext *s)
1220 {
1221     int i;
1222
1223     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1224         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1225     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1226
1227     /* set next picture type & ordering */
1228     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1229         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1230             s->next_picture_ptr == NULL || s->intra_only) {
1231             s->reordered_input_picture[0] = s->input_picture[0];
1232             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1233             s->reordered_input_picture[0]->f.coded_picture_number =
1234                 s->coded_picture_number++;
1235         } else {
1236             int b_frames;
1237
1238             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1239                 if (s->picture_in_gop_number < s->gop_size &&
1240                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1241                     // FIXME check that te gop check above is +-1 correct
1242                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1243                     //       s->input_picture[0]->f.data[0],
1244                     //       s->input_picture[0]->pts);
1245
1246                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1247                         for (i = 0; i < 4; i++)
1248                             s->input_picture[0]->f.data[i] = NULL;
1249                         s->input_picture[0]->f.type = 0;
1250                     } else {
1251                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1252                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1253
1254                         s->avctx->release_buffer(s->avctx,
1255                                                  &s->input_picture[0]->f);
1256                     }
1257
1258                     emms_c();
1259                     ff_vbv_update(s, 0);
1260
1261                     goto no_output_pic;
1262                 }
1263             }
1264
1265             if (s->flags & CODEC_FLAG_PASS2) {
1266                 for (i = 0; i < s->max_b_frames + 1; i++) {
1267                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1268
1269                     if (pict_num >= s->rc_context.num_entries)
1270                         break;
1271                     if (!s->input_picture[i]) {
1272                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1273                         break;
1274                     }
1275
1276                     s->input_picture[i]->f.pict_type =
1277                         s->rc_context.entry[pict_num].new_pict_type;
1278                 }
1279             }
1280
1281             if (s->avctx->b_frame_strategy == 0) {
1282                 b_frames = s->max_b_frames;
1283                 while (b_frames && !s->input_picture[b_frames])
1284                     b_frames--;
1285             } else if (s->avctx->b_frame_strategy == 1) {
1286                 for (i = 1; i < s->max_b_frames + 1; i++) {
1287                     if (s->input_picture[i] &&
1288                         s->input_picture[i]->b_frame_score == 0) {
1289                         s->input_picture[i]->b_frame_score =
1290                             get_intra_count(s,
1291                                             s->input_picture[i    ]->f.data[0],
1292                                             s->input_picture[i - 1]->f.data[0],
1293                                             s->linesize) + 1;
1294                     }
1295                 }
1296                 for (i = 0; i < s->max_b_frames + 1; i++) {
1297                     if (s->input_picture[i] == NULL ||
1298                         s->input_picture[i]->b_frame_score - 1 >
1299                             s->mb_num / s->avctx->b_sensitivity)
1300                         break;
1301                 }
1302
1303                 b_frames = FFMAX(0, i - 1);
1304
1305                 /* reset scores */
1306                 for (i = 0; i < b_frames + 1; i++) {
1307                     s->input_picture[i]->b_frame_score = 0;
1308                 }
1309             } else if (s->avctx->b_frame_strategy == 2) {
1310                 b_frames = estimate_best_b_count(s);
1311             } else {
1312                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1313                 b_frames = 0;
1314             }
1315
1316             emms_c();
1317             //static int b_count = 0;
1318             //b_count += b_frames;
1319             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1320
1321             for (i = b_frames - 1; i >= 0; i--) {
1322                 int type = s->input_picture[i]->f.pict_type;
1323                 if (type && type != AV_PICTURE_TYPE_B)
1324                     b_frames = i;
1325             }
1326             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1327                 b_frames == s->max_b_frames) {
1328                 av_log(s->avctx, AV_LOG_ERROR,
1329                        "warning, too many b frames in a row\n");
1330             }
1331
1332             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1333                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1334                     s->gop_size > s->picture_in_gop_number) {
1335                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1336                 } else {
1337                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1338                         b_frames = 0;
1339                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1340                 }
1341             }
1342
1343             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1344                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1345                 b_frames--;
1346
1347             s->reordered_input_picture[0] = s->input_picture[b_frames];
1348             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1349                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1350             s->reordered_input_picture[0]->f.coded_picture_number =
1351                 s->coded_picture_number++;
1352             for (i = 0; i < b_frames; i++) {
1353                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1354                 s->reordered_input_picture[i + 1]->f.pict_type =
1355                     AV_PICTURE_TYPE_B;
1356                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1357                     s->coded_picture_number++;
1358             }
1359         }
1360     }
1361 no_output_pic:
1362     if (s->reordered_input_picture[0]) {
1363         s->reordered_input_picture[0]->f.reference =
1364            s->reordered_input_picture[0]->f.pict_type !=
1365                AV_PICTURE_TYPE_B ? 3 : 0;
1366
1367         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1368
1369         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1370             s->avctx->rc_buffer_size) {
1371             // input is a shared pix, so we can't modifiy it -> alloc a new
1372             // one & ensure that the shared one is reuseable
1373
1374             Picture *pic;
1375             int i = ff_find_unused_picture(s, 0);
1376             if (i < 0)
1377                 return i;
1378             pic = &s->picture[i];
1379
1380             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1381             if (ff_alloc_picture(s, pic, 0) < 0) {
1382                 return -1;
1383             }
1384
1385             /* mark us unused / free shared pic */
1386             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1387                 s->avctx->release_buffer(s->avctx,
1388                                          &s->reordered_input_picture[0]->f);
1389             for (i = 0; i < 4; i++)
1390                 s->reordered_input_picture[0]->f.data[i] = NULL;
1391             s->reordered_input_picture[0]->f.type = 0;
1392
1393             copy_picture_attributes(s, &pic->f,
1394                                     &s->reordered_input_picture[0]->f);
1395
1396             s->current_picture_ptr = pic;
1397         } else {
1398             // input is not a shared pix -> reuse buffer for current_pix
1399
1400             assert(s->reordered_input_picture[0]->f.type ==
1401                        FF_BUFFER_TYPE_USER ||
1402                    s->reordered_input_picture[0]->f.type ==
1403                        FF_BUFFER_TYPE_INTERNAL);
1404
1405             s->current_picture_ptr = s->reordered_input_picture[0];
1406             for (i = 0; i < 4; i++) {
1407                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1408             }
1409         }
1410         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1411
1412         s->picture_number = s->new_picture.f.display_picture_number;
1413         //printf("dpn:%d\n", s->picture_number);
1414     } else {
1415         memset(&s->new_picture, 0, sizeof(Picture));
1416     }
1417     return 0;
1418 }
1419
1420 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1421                           AVFrame *pic_arg, int *got_packet)
1422 {
1423     MpegEncContext *s = avctx->priv_data;
1424     int i, stuffing_count, ret;
1425     int context_count = s->slice_context_count;
1426
1427     s->picture_in_gop_number++;
1428
1429     if (load_input_picture(s, pic_arg) < 0)
1430         return -1;
1431
1432     if (select_input_picture(s) < 0) {
1433         return -1;
1434     }
1435
1436     /* output? */
1437     if (s->new_picture.f.data[0]) {
1438         if (!pkt->data &&
1439             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1440             return ret;
1441         if (s->mb_info) {
1442             s->mb_info_ptr = av_packet_new_side_data(pkt,
1443                                  AV_PKT_DATA_H263_MB_INFO,
1444                                  s->mb_width*s->mb_height*12);
1445             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1446         }
1447
1448         for (i = 0; i < context_count; i++) {
1449             int start_y = s->thread_context[i]->start_mb_y;
1450             int   end_y = s->thread_context[i]->  end_mb_y;
1451             int h       = s->mb_height;
1452             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1453             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1454
1455             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1456         }
1457
1458         s->pict_type = s->new_picture.f.pict_type;
1459         //emms_c();
1460         //printf("qs:%f %f %d\n", s->new_picture.quality,
1461         //       s->current_picture.quality, s->qscale);
1462         ff_MPV_frame_start(s, avctx);
1463 vbv_retry:
1464         if (encode_picture(s, s->picture_number) < 0)
1465             return -1;
1466
1467         avctx->header_bits = s->header_bits;
1468         avctx->mv_bits     = s->mv_bits;
1469         avctx->misc_bits   = s->misc_bits;
1470         avctx->i_tex_bits  = s->i_tex_bits;
1471         avctx->p_tex_bits  = s->p_tex_bits;
1472         avctx->i_count     = s->i_count;
1473         // FIXME f/b_count in avctx
1474         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1475         avctx->skip_count  = s->skip_count;
1476
1477         ff_MPV_frame_end(s);
1478
1479         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1480             ff_mjpeg_encode_picture_trailer(s);
1481
1482         if (avctx->rc_buffer_size) {
1483             RateControlContext *rcc = &s->rc_context;
1484             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1485
1486             if (put_bits_count(&s->pb) > max_size &&
1487                 s->lambda < s->avctx->lmax) {
1488                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1489                                        (s->qscale + 1) / s->qscale);
1490                 if (s->adaptive_quant) {
1491                     int i;
1492                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1493                         s->lambda_table[i] =
1494                             FFMAX(s->lambda_table[i] + 1,
1495                                   s->lambda_table[i] * (s->qscale + 1) /
1496                                   s->qscale);
1497                 }
1498                 s->mb_skipped = 0;        // done in MPV_frame_start()
1499                 // done in encode_picture() so we must undo it
1500                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1501                     if (s->flipflop_rounding          ||
1502                         s->codec_id == CODEC_ID_H263P ||
1503                         s->codec_id == CODEC_ID_MPEG4)
1504                         s->no_rounding ^= 1;
1505                 }
1506                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1507                     s->time_base       = s->last_time_base;
1508                     s->last_non_b_time = s->time - s->pp_time;
1509                 }
1510                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1511                 for (i = 0; i < context_count; i++) {
1512                     PutBitContext *pb = &s->thread_context[i]->pb;
1513                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1514                 }
1515                 goto vbv_retry;
1516             }
1517
1518             assert(s->avctx->rc_max_rate);
1519         }
1520
1521         if (s->flags & CODEC_FLAG_PASS1)
1522             ff_write_pass1_stats(s);
1523
1524         for (i = 0; i < 4; i++) {
1525             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1526             avctx->error[i] += s->current_picture_ptr->f.error[i];
1527         }
1528
1529         if (s->flags & CODEC_FLAG_PASS1)
1530             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1531                    avctx->i_tex_bits + avctx->p_tex_bits ==
1532                        put_bits_count(&s->pb));
1533         flush_put_bits(&s->pb);
1534         s->frame_bits  = put_bits_count(&s->pb);
1535
1536         stuffing_count = ff_vbv_update(s, s->frame_bits);
1537         if (stuffing_count) {
1538             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1539                     stuffing_count + 50) {
1540                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1541                 return -1;
1542             }
1543
1544             switch (s->codec_id) {
1545             case CODEC_ID_MPEG1VIDEO:
1546             case CODEC_ID_MPEG2VIDEO:
1547                 while (stuffing_count--) {
1548                     put_bits(&s->pb, 8, 0);
1549                 }
1550             break;
1551             case CODEC_ID_MPEG4:
1552                 put_bits(&s->pb, 16, 0);
1553                 put_bits(&s->pb, 16, 0x1C3);
1554                 stuffing_count -= 4;
1555                 while (stuffing_count--) {
1556                     put_bits(&s->pb, 8, 0xFF);
1557                 }
1558             break;
1559             default:
1560                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1561             }
1562             flush_put_bits(&s->pb);
1563             s->frame_bits  = put_bits_count(&s->pb);
1564         }
1565
1566         /* update mpeg1/2 vbv_delay for CBR */
1567         if (s->avctx->rc_max_rate                          &&
1568             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1569             s->out_format == FMT_MPEG1                     &&
1570             90000LL * (avctx->rc_buffer_size - 1) <=
1571                 s->avctx->rc_max_rate * 0xFFFFLL) {
1572             int vbv_delay, min_delay;
1573             double inbits  = s->avctx->rc_max_rate *
1574                              av_q2d(s->avctx->time_base);
1575             int    minbits = s->frame_bits - 8 *
1576                              (s->vbv_delay_ptr - s->pb.buf - 1);
1577             double bits    = s->rc_context.buffer_index + minbits - inbits;
1578
1579             if (bits < 0)
1580                 av_log(s->avctx, AV_LOG_ERROR,
1581                        "Internal error, negative bits\n");
1582
1583             assert(s->repeat_first_field == 0);
1584
1585             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1586             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1587                         s->avctx->rc_max_rate;
1588
1589             vbv_delay = FFMAX(vbv_delay, min_delay);
1590
1591             assert(vbv_delay < 0xFFFF);
1592
1593             s->vbv_delay_ptr[0] &= 0xF8;
1594             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1595             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1596             s->vbv_delay_ptr[2] &= 0x07;
1597             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1598             avctx->vbv_delay     = vbv_delay * 300;
1599         }
1600         s->total_bits     += s->frame_bits;
1601         avctx->frame_bits  = s->frame_bits;
1602
1603         pkt->pts = s->current_picture.f.pts;
1604         if (!s->low_delay) {
1605             if (!s->current_picture.f.coded_picture_number)
1606                 pkt->dts = pkt->pts - s->dts_delta;
1607             else
1608                 pkt->dts = s->reordered_pts;
1609             s->reordered_pts = s->input_picture[0]->f.pts;
1610         } else
1611             pkt->dts = pkt->pts;
1612         if (s->current_picture.f.key_frame)
1613             pkt->flags |= AV_PKT_FLAG_KEY;
1614         if (s->mb_info)
1615             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1616     } else {
1617         assert((put_bits_ptr(&s->pb) == s->pb.buf));
1618         s->frame_bits = 0;
1619     }
1620     assert((s->frame_bits & 7) == 0);
1621
1622     pkt->size = s->frame_bits / 8;
1623     *got_packet = !!pkt->size;
1624     return 0;
1625 }
1626
1627 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1628                                                 int n, int threshold)
1629 {
1630     static const char tab[64] = {
1631         3, 2, 2, 1, 1, 1, 1, 1,
1632         1, 1, 1, 1, 1, 1, 1, 1,
1633         1, 1, 1, 1, 1, 1, 1, 1,
1634         0, 0, 0, 0, 0, 0, 0, 0,
1635         0, 0, 0, 0, 0, 0, 0, 0,
1636         0, 0, 0, 0, 0, 0, 0, 0,
1637         0, 0, 0, 0, 0, 0, 0, 0,
1638         0, 0, 0, 0, 0, 0, 0, 0
1639     };
1640     int score = 0;
1641     int run = 0;
1642     int i;
1643     DCTELEM *block = s->block[n];
1644     const int last_index = s->block_last_index[n];
1645     int skip_dc;
1646
1647     if (threshold < 0) {
1648         skip_dc = 0;
1649         threshold = -threshold;
1650     } else
1651         skip_dc = 1;
1652
1653     /* Are all we could set to zero already zero? */
1654     if (last_index <= skip_dc - 1)
1655         return;
1656
1657     for (i = 0; i <= last_index; i++) {
1658         const int j = s->intra_scantable.permutated[i];
1659         const int level = FFABS(block[j]);
1660         if (level == 1) {
1661             if (skip_dc && i == 0)
1662                 continue;
1663             score += tab[run];
1664             run = 0;
1665         } else if (level > 1) {
1666             return;
1667         } else {
1668             run++;
1669         }
1670     }
1671     if (score >= threshold)
1672         return;
1673     for (i = skip_dc; i <= last_index; i++) {
1674         const int j = s->intra_scantable.permutated[i];
1675         block[j] = 0;
1676     }
1677     if (block[0])
1678         s->block_last_index[n] = 0;
1679     else
1680         s->block_last_index[n] = -1;
1681 }
1682
1683 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1684                                int last_index)
1685 {
1686     int i;
1687     const int maxlevel = s->max_qcoeff;
1688     const int minlevel = s->min_qcoeff;
1689     int overflow = 0;
1690
1691     if (s->mb_intra) {
1692         i = 1; // skip clipping of intra dc
1693     } else
1694         i = 0;
1695
1696     for (; i <= last_index; i++) {
1697         const int j = s->intra_scantable.permutated[i];
1698         int level = block[j];
1699
1700         if (level > maxlevel) {
1701             level = maxlevel;
1702             overflow++;
1703         } else if (level < minlevel) {
1704             level = minlevel;
1705             overflow++;
1706         }
1707
1708         block[j] = level;
1709     }
1710
1711     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1712         av_log(s->avctx, AV_LOG_INFO,
1713                "warning, clipping %d dct coefficients to %d..%d\n",
1714                overflow, minlevel, maxlevel);
1715 }
1716
1717 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1718 {
1719     int x, y;
1720     // FIXME optimize
1721     for (y = 0; y < 8; y++) {
1722         for (x = 0; x < 8; x++) {
1723             int x2, y2;
1724             int sum = 0;
1725             int sqr = 0;
1726             int count = 0;
1727
1728             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1729                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1730                     int v = ptr[x2 + y2 * stride];
1731                     sum += v;
1732                     sqr += v * v;
1733                     count++;
1734                 }
1735             }
1736             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1737         }
1738     }
1739 }
1740
1741 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1742                                                 int motion_x, int motion_y,
1743                                                 int mb_block_height,
1744                                                 int mb_block_count)
1745 {
1746     int16_t weight[8][64];
1747     DCTELEM orig[8][64];
1748     const int mb_x = s->mb_x;
1749     const int mb_y = s->mb_y;
1750     int i;
1751     int skip_dct[8];
1752     int dct_offset = s->linesize * 8; // default for progressive frames
1753     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1754     int wrap_y, wrap_c;
1755
1756     for (i = 0; i < mb_block_count; i++)
1757         skip_dct[i] = s->skipdct;
1758
1759     if (s->adaptive_quant) {
1760         const int last_qp = s->qscale;
1761         const int mb_xy = mb_x + mb_y * s->mb_stride;
1762
1763         s->lambda = s->lambda_table[mb_xy];
1764         update_qscale(s);
1765
1766         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1767             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1768             s->dquant = s->qscale - last_qp;
1769
1770             if (s->out_format == FMT_H263) {
1771                 s->dquant = av_clip(s->dquant, -2, 2);
1772
1773                 if (s->codec_id == CODEC_ID_MPEG4) {
1774                     if (!s->mb_intra) {
1775                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1776                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1777                                 s->dquant = 0;
1778                         }
1779                         if (s->mv_type == MV_TYPE_8X8)
1780                             s->dquant = 0;
1781                     }
1782                 }
1783             }
1784         }
1785         ff_set_qscale(s, last_qp + s->dquant);
1786     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1787         ff_set_qscale(s, s->qscale + s->dquant);
1788
1789     wrap_y = s->linesize;
1790     wrap_c = s->uvlinesize;
1791     ptr_y  = s->new_picture.f.data[0] +
1792              (mb_y * 16 * wrap_y)              + mb_x * 16;
1793     ptr_cb = s->new_picture.f.data[1] +
1794              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1795     ptr_cr = s->new_picture.f.data[2] +
1796              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1797
1798     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != CODEC_ID_AMV){
1799         uint8_t *ebuf = s->edge_emu_buffer + 32;
1800         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1801                                 mb_y * 16, s->width, s->height);
1802         ptr_y = ebuf;
1803         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1804                                 mb_block_height, mb_x * 8, mb_y * 8,
1805                                 s->width >> 1, s->height >> 1);
1806         ptr_cb = ebuf + 18 * wrap_y;
1807         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1808                                 mb_block_height, mb_x * 8, mb_y * 8,
1809                                 s->width >> 1, s->height >> 1);
1810         ptr_cr = ebuf + 18 * wrap_y + 8;
1811     }
1812
1813     if (s->mb_intra) {
1814         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1815             int progressive_score, interlaced_score;
1816
1817             s->interlaced_dct = 0;
1818             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1819                                                     NULL, wrap_y, 8) +
1820                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1821                                                     NULL, wrap_y, 8) - 400;
1822
1823             if (progressive_score > 0) {
1824                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1825                                                        NULL, wrap_y * 2, 8) +
1826                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1827                                                        NULL, wrap_y * 2, 8);
1828                 if (progressive_score > interlaced_score) {
1829                     s->interlaced_dct = 1;
1830
1831                     dct_offset = wrap_y;
1832                     wrap_y <<= 1;
1833                     if (s->chroma_format == CHROMA_422)
1834                         wrap_c <<= 1;
1835                 }
1836             }
1837         }
1838
1839         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1840         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1841         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1842         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1843
1844         if (s->flags & CODEC_FLAG_GRAY) {
1845             skip_dct[4] = 1;
1846             skip_dct[5] = 1;
1847         } else {
1848             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1849             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1850             if (!s->chroma_y_shift) { /* 422 */
1851                 s->dsp.get_pixels(s->block[6],
1852                                   ptr_cb + (dct_offset >> 1), wrap_c);
1853                 s->dsp.get_pixels(s->block[7],
1854                                   ptr_cr + (dct_offset >> 1), wrap_c);
1855             }
1856         }
1857     } else {
1858         op_pixels_func (*op_pix)[4];
1859         qpel_mc_func (*op_qpix)[16];
1860         uint8_t *dest_y, *dest_cb, *dest_cr;
1861
1862         dest_y  = s->dest[0];
1863         dest_cb = s->dest[1];
1864         dest_cr = s->dest[2];
1865
1866         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1867             op_pix  = s->dsp.put_pixels_tab;
1868             op_qpix = s->dsp.put_qpel_pixels_tab;
1869         } else {
1870             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1871             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1872         }
1873
1874         if (s->mv_dir & MV_DIR_FORWARD) {
1875             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data,
1876                        op_pix, op_qpix);
1877             op_pix  = s->dsp.avg_pixels_tab;
1878             op_qpix = s->dsp.avg_qpel_pixels_tab;
1879         }
1880         if (s->mv_dir & MV_DIR_BACKWARD) {
1881             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data,
1882                        op_pix, op_qpix);
1883         }
1884
1885         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1886             int progressive_score, interlaced_score;
1887
1888             s->interlaced_dct = 0;
1889             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1890                                                     ptr_y,              wrap_y,
1891                                                     8) +
1892                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1893                                                     ptr_y + wrap_y * 8, wrap_y,
1894                                                     8) - 400;
1895
1896             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1897                 progressive_score -= 400;
1898
1899             if (progressive_score > 0) {
1900                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1901                                                        ptr_y,
1902                                                        wrap_y * 2, 8) +
1903                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1904                                                        ptr_y + wrap_y,
1905                                                        wrap_y * 2, 8);
1906
1907                 if (progressive_score > interlaced_score) {
1908                     s->interlaced_dct = 1;
1909
1910                     dct_offset = wrap_y;
1911                     wrap_y <<= 1;
1912                     if (s->chroma_format == CHROMA_422)
1913                         wrap_c <<= 1;
1914                 }
1915             }
1916         }
1917
1918         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1919         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1920         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1921                            dest_y + dct_offset, wrap_y);
1922         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1923                            dest_y + dct_offset + 8, wrap_y);
1924
1925         if (s->flags & CODEC_FLAG_GRAY) {
1926             skip_dct[4] = 1;
1927             skip_dct[5] = 1;
1928         } else {
1929             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1930             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1931             if (!s->chroma_y_shift) { /* 422 */
1932                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1933                                    dest_cb + (dct_offset >> 1), wrap_c);
1934                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1935                                    dest_cr + (dct_offset >> 1), wrap_c);
1936             }
1937         }
1938         /* pre quantization */
1939         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1940                 2 * s->qscale * s->qscale) {
1941             // FIXME optimize
1942             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1943                               wrap_y, 8) < 20 * s->qscale)
1944                 skip_dct[0] = 1;
1945             if (s->dsp.sad[1](NULL, ptr_y + 8,
1946                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1947                 skip_dct[1] = 1;
1948             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1949                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1950                 skip_dct[2] = 1;
1951             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1952                               dest_y + dct_offset + 8,
1953                               wrap_y, 8) < 20 * s->qscale)
1954                 skip_dct[3] = 1;
1955             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1956                               wrap_c, 8) < 20 * s->qscale)
1957                 skip_dct[4] = 1;
1958             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1959                               wrap_c, 8) < 20 * s->qscale)
1960                 skip_dct[5] = 1;
1961             if (!s->chroma_y_shift) { /* 422 */
1962                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1963                                   dest_cb + (dct_offset >> 1),
1964                                   wrap_c, 8) < 20 * s->qscale)
1965                     skip_dct[6] = 1;
1966                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1967                                   dest_cr + (dct_offset >> 1),
1968                                   wrap_c, 8) < 20 * s->qscale)
1969                     skip_dct[7] = 1;
1970             }
1971         }
1972     }
1973
1974     if (s->quantizer_noise_shaping) {
1975         if (!skip_dct[0])
1976             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1977         if (!skip_dct[1])
1978             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1979         if (!skip_dct[2])
1980             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1981         if (!skip_dct[3])
1982             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1983         if (!skip_dct[4])
1984             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1985         if (!skip_dct[5])
1986             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1987         if (!s->chroma_y_shift) { /* 422 */
1988             if (!skip_dct[6])
1989                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1990                                   wrap_c);
1991             if (!skip_dct[7])
1992                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1993                                   wrap_c);
1994         }
1995         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
1996     }
1997
1998     /* DCT & quantize */
1999     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2000     {
2001         for (i = 0; i < mb_block_count; i++) {
2002             if (!skip_dct[i]) {
2003                 int overflow;
2004                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2005                 // FIXME we could decide to change to quantizer instead of
2006                 // clipping
2007                 // JS: I don't think that would be a good idea it could lower
2008                 //     quality instead of improve it. Just INTRADC clipping
2009                 //     deserves changes in quantizer
2010                 if (overflow)
2011                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2012             } else
2013                 s->block_last_index[i] = -1;
2014         }
2015         if (s->quantizer_noise_shaping) {
2016             for (i = 0; i < mb_block_count; i++) {
2017                 if (!skip_dct[i]) {
2018                     s->block_last_index[i] =
2019                         dct_quantize_refine(s, s->block[i], weight[i],
2020                                             orig[i], i, s->qscale);
2021                 }
2022             }
2023         }
2024
2025         if (s->luma_elim_threshold && !s->mb_intra)
2026             for (i = 0; i < 4; i++)
2027                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2028         if (s->chroma_elim_threshold && !s->mb_intra)
2029             for (i = 4; i < mb_block_count; i++)
2030                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2031
2032         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2033             for (i = 0; i < mb_block_count; i++) {
2034                 if (s->block_last_index[i] == -1)
2035                     s->coded_score[i] = INT_MAX / 256;
2036             }
2037         }
2038     }
2039
2040     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2041         s->block_last_index[4] =
2042         s->block_last_index[5] = 0;
2043         s->block[4][0] =
2044         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2045     }
2046
2047     // non c quantize code returns incorrect block_last_index FIXME
2048     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2049         for (i = 0; i < mb_block_count; i++) {
2050             int j;
2051             if (s->block_last_index[i] > 0) {
2052                 for (j = 63; j > 0; j--) {
2053                     if (s->block[i][s->intra_scantable.permutated[j]])
2054                         break;
2055                 }
2056                 s->block_last_index[i] = j;
2057             }
2058         }
2059     }
2060
2061     /* huffman encode */
2062     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2063     case CODEC_ID_MPEG1VIDEO:
2064     case CODEC_ID_MPEG2VIDEO:
2065         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2066             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2067         break;
2068     case CODEC_ID_MPEG4:
2069         if (CONFIG_MPEG4_ENCODER)
2070             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2071         break;
2072     case CODEC_ID_MSMPEG4V2:
2073     case CODEC_ID_MSMPEG4V3:
2074     case CODEC_ID_WMV1:
2075         if (CONFIG_MSMPEG4_ENCODER)
2076             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2077         break;
2078     case CODEC_ID_WMV2:
2079         if (CONFIG_WMV2_ENCODER)
2080             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2081         break;
2082     case CODEC_ID_H261:
2083         if (CONFIG_H261_ENCODER)
2084             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2085         break;
2086     case CODEC_ID_H263:
2087     case CODEC_ID_H263P:
2088     case CODEC_ID_FLV1:
2089     case CODEC_ID_RV10:
2090     case CODEC_ID_RV20:
2091         if (CONFIG_H263_ENCODER)
2092             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2093         break;
2094     case CODEC_ID_MJPEG:
2095     case CODEC_ID_AMV:
2096         if (CONFIG_MJPEG_ENCODER)
2097             ff_mjpeg_encode_mb(s, s->block);
2098         break;
2099     default:
2100         assert(0);
2101     }
2102 }
2103
2104 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2105 {
2106     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2107     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2108 }
2109
2110 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2111     int i;
2112
2113     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2114
2115     /* mpeg1 */
2116     d->mb_skip_run= s->mb_skip_run;
2117     for(i=0; i<3; i++)
2118         d->last_dc[i] = s->last_dc[i];
2119
2120     /* statistics */
2121     d->mv_bits= s->mv_bits;
2122     d->i_tex_bits= s->i_tex_bits;
2123     d->p_tex_bits= s->p_tex_bits;
2124     d->i_count= s->i_count;
2125     d->f_count= s->f_count;
2126     d->b_count= s->b_count;
2127     d->skip_count= s->skip_count;
2128     d->misc_bits= s->misc_bits;
2129     d->last_bits= 0;
2130
2131     d->mb_skipped= 0;
2132     d->qscale= s->qscale;
2133     d->dquant= s->dquant;
2134
2135     d->esc3_level_length= s->esc3_level_length;
2136 }
2137
2138 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2139     int i;
2140
2141     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2142     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2143
2144     /* mpeg1 */
2145     d->mb_skip_run= s->mb_skip_run;
2146     for(i=0; i<3; i++)
2147         d->last_dc[i] = s->last_dc[i];
2148
2149     /* statistics */
2150     d->mv_bits= s->mv_bits;
2151     d->i_tex_bits= s->i_tex_bits;
2152     d->p_tex_bits= s->p_tex_bits;
2153     d->i_count= s->i_count;
2154     d->f_count= s->f_count;
2155     d->b_count= s->b_count;
2156     d->skip_count= s->skip_count;
2157     d->misc_bits= s->misc_bits;
2158
2159     d->mb_intra= s->mb_intra;
2160     d->mb_skipped= s->mb_skipped;
2161     d->mv_type= s->mv_type;
2162     d->mv_dir= s->mv_dir;
2163     d->pb= s->pb;
2164     if(s->data_partitioning){
2165         d->pb2= s->pb2;
2166         d->tex_pb= s->tex_pb;
2167     }
2168     d->block= s->block;
2169     for(i=0; i<8; i++)
2170         d->block_last_index[i]= s->block_last_index[i];
2171     d->interlaced_dct= s->interlaced_dct;
2172     d->qscale= s->qscale;
2173
2174     d->esc3_level_length= s->esc3_level_length;
2175 }
2176
2177 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2178                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2179                            int *dmin, int *next_block, int motion_x, int motion_y)
2180 {
2181     int score;
2182     uint8_t *dest_backup[3];
2183
2184     copy_context_before_encode(s, backup, type);
2185
2186     s->block= s->blocks[*next_block];
2187     s->pb= pb[*next_block];
2188     if(s->data_partitioning){
2189         s->pb2   = pb2   [*next_block];
2190         s->tex_pb= tex_pb[*next_block];
2191     }
2192
2193     if(*next_block){
2194         memcpy(dest_backup, s->dest, sizeof(s->dest));
2195         s->dest[0] = s->rd_scratchpad;
2196         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2197         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2198         assert(s->linesize >= 32); //FIXME
2199     }
2200
2201     encode_mb(s, motion_x, motion_y);
2202
2203     score= put_bits_count(&s->pb);
2204     if(s->data_partitioning){
2205         score+= put_bits_count(&s->pb2);
2206         score+= put_bits_count(&s->tex_pb);
2207     }
2208
2209     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2210         ff_MPV_decode_mb(s, s->block);
2211
2212         score *= s->lambda2;
2213         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2214     }
2215
2216     if(*next_block){
2217         memcpy(s->dest, dest_backup, sizeof(s->dest));
2218     }
2219
2220     if(score<*dmin){
2221         *dmin= score;
2222         *next_block^=1;
2223
2224         copy_context_after_encode(best, s, type);
2225     }
2226 }
2227
2228 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2229     uint32_t *sq = ff_squareTbl + 256;
2230     int acc=0;
2231     int x,y;
2232
2233     if(w==16 && h==16)
2234         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2235     else if(w==8 && h==8)
2236         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2237
2238     for(y=0; y<h; y++){
2239         for(x=0; x<w; x++){
2240             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2241         }
2242     }
2243
2244     assert(acc>=0);
2245
2246     return acc;
2247 }
2248
2249 static int sse_mb(MpegEncContext *s){
2250     int w= 16;
2251     int h= 16;
2252
2253     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2254     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2255
2256     if(w==16 && h==16)
2257       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2258         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2259                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2260                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2261       }else{
2262         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2263                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2264                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2265       }
2266     else
2267         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2268                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2269                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2270 }
2271
2272 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2273     MpegEncContext *s= *(void**)arg;
2274
2275
2276     s->me.pre_pass=1;
2277     s->me.dia_size= s->avctx->pre_dia_size;
2278     s->first_slice_line=1;
2279     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2280         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2281             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2282         }
2283         s->first_slice_line=0;
2284     }
2285
2286     s->me.pre_pass=0;
2287
2288     return 0;
2289 }
2290
2291 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2292     MpegEncContext *s= *(void**)arg;
2293
2294     ff_check_alignment();
2295
2296     s->me.dia_size= s->avctx->dia_size;
2297     s->first_slice_line=1;
2298     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2299         s->mb_x=0; //for block init below
2300         ff_init_block_index(s);
2301         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2302             s->block_index[0]+=2;
2303             s->block_index[1]+=2;
2304             s->block_index[2]+=2;
2305             s->block_index[3]+=2;
2306
2307             /* compute motion vector & mb_type and store in context */
2308             if(s->pict_type==AV_PICTURE_TYPE_B)
2309                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2310             else
2311                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2312         }
2313         s->first_slice_line=0;
2314     }
2315     return 0;
2316 }
2317
2318 static int mb_var_thread(AVCodecContext *c, void *arg){
2319     MpegEncContext *s= *(void**)arg;
2320     int mb_x, mb_y;
2321
2322     ff_check_alignment();
2323
2324     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2325         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2326             int xx = mb_x * 16;
2327             int yy = mb_y * 16;
2328             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2329             int varc;
2330             int sum = s->dsp.pix_sum(pix, s->linesize);
2331
2332             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2333
2334             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2335             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2336             s->me.mb_var_sum_temp    += varc;
2337         }
2338     }
2339     return 0;
2340 }
2341
2342 static void write_slice_end(MpegEncContext *s){
2343     if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4){
2344         if(s->partitioned_frame){
2345             ff_mpeg4_merge_partitions(s);
2346         }
2347
2348         ff_mpeg4_stuffing(&s->pb);
2349     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2350         ff_mjpeg_encode_stuffing(&s->pb);
2351     }
2352
2353     avpriv_align_put_bits(&s->pb);
2354     flush_put_bits(&s->pb);
2355
2356     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2357         s->misc_bits+= get_bits_diff(s);
2358 }
2359
2360 static void write_mb_info(MpegEncContext *s)
2361 {
2362     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2363     int offset = put_bits_count(&s->pb);
2364     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2365     int gobn = s->mb_y / s->gob_index;
2366     int pred_x, pred_y;
2367     if (CONFIG_H263_ENCODER)
2368         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2369     bytestream_put_le32(&ptr, offset);
2370     bytestream_put_byte(&ptr, s->qscale);
2371     bytestream_put_byte(&ptr, gobn);
2372     bytestream_put_le16(&ptr, mba);
2373     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2374     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2375     /* 4MV not implemented */
2376     bytestream_put_byte(&ptr, 0); /* hmv2 */
2377     bytestream_put_byte(&ptr, 0); /* vmv2 */
2378 }
2379
2380 static void update_mb_info(MpegEncContext *s, int startcode)
2381 {
2382     if (!s->mb_info)
2383         return;
2384     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2385         s->mb_info_size += 12;
2386         s->prev_mb_info = s->last_mb_info;
2387     }
2388     if (startcode) {
2389         s->prev_mb_info = put_bits_count(&s->pb)/8;
2390         /* This might have incremented mb_info_size above, and we return without
2391          * actually writing any info into that slot yet. But in that case,
2392          * this will be called again at the start of the after writing the
2393          * start code, actually writing the mb info. */
2394         return;
2395     }
2396
2397     s->last_mb_info = put_bits_count(&s->pb)/8;
2398     if (!s->mb_info_size)
2399         s->mb_info_size += 12;
2400     write_mb_info(s);
2401 }
2402
2403 static int encode_thread(AVCodecContext *c, void *arg){
2404     MpegEncContext *s= *(void**)arg;
2405     int mb_x, mb_y, pdif = 0;
2406     int chr_h= 16>>s->chroma_y_shift;
2407     int i, j;
2408     MpegEncContext best_s, backup_s;
2409     uint8_t bit_buf[2][MAX_MB_BYTES];
2410     uint8_t bit_buf2[2][MAX_MB_BYTES];
2411     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2412     PutBitContext pb[2], pb2[2], tex_pb[2];
2413 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2414
2415     ff_check_alignment();
2416
2417     for(i=0; i<2; i++){
2418         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2419         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2420         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2421     }
2422
2423     s->last_bits= put_bits_count(&s->pb);
2424     s->mv_bits=0;
2425     s->misc_bits=0;
2426     s->i_tex_bits=0;
2427     s->p_tex_bits=0;
2428     s->i_count=0;
2429     s->f_count=0;
2430     s->b_count=0;
2431     s->skip_count=0;
2432
2433     for(i=0; i<3; i++){
2434         /* init last dc values */
2435         /* note: quant matrix value (8) is implied here */
2436         s->last_dc[i] = 128 << s->intra_dc_precision;
2437
2438         s->current_picture.f.error[i] = 0;
2439     }
2440     if(s->codec_id==CODEC_ID_AMV){
2441         s->last_dc[0] = 128*8/13;
2442         s->last_dc[1] = 128*8/14;
2443         s->last_dc[2] = 128*8/14;
2444     }
2445     s->mb_skip_run = 0;
2446     memset(s->last_mv, 0, sizeof(s->last_mv));
2447
2448     s->last_mv_dir = 0;
2449
2450     switch(s->codec_id){
2451     case CODEC_ID_H263:
2452     case CODEC_ID_H263P:
2453     case CODEC_ID_FLV1:
2454         if (CONFIG_H263_ENCODER)
2455             s->gob_index = ff_h263_get_gob_height(s);
2456         break;
2457     case CODEC_ID_MPEG4:
2458         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2459             ff_mpeg4_init_partitions(s);
2460         break;
2461     }
2462
2463     s->resync_mb_x=0;
2464     s->resync_mb_y=0;
2465     s->first_slice_line = 1;
2466     s->ptr_lastgob = s->pb.buf;
2467     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2468 //    printf("row %d at %X\n", s->mb_y, (int)s);
2469         s->mb_x=0;
2470         s->mb_y= mb_y;
2471
2472         ff_set_qscale(s, s->qscale);
2473         ff_init_block_index(s);
2474
2475         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2476             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2477             int mb_type= s->mb_type[xy];
2478 //            int d;
2479             int dmin= INT_MAX;
2480             int dir;
2481
2482             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2483                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2484                 return -1;
2485             }
2486             if(s->data_partitioning){
2487                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2488                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2489                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2490                     return -1;
2491                 }
2492             }
2493
2494             s->mb_x = mb_x;
2495             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2496             ff_update_block_index(s);
2497
2498             if(CONFIG_H261_ENCODER && s->codec_id == CODEC_ID_H261){
2499                 ff_h261_reorder_mb_index(s);
2500                 xy= s->mb_y*s->mb_stride + s->mb_x;
2501                 mb_type= s->mb_type[xy];
2502             }
2503
2504             /* write gob / video packet header  */
2505             if(s->rtp_mode){
2506                 int current_packet_size, is_gob_start;
2507
2508                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2509
2510                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2511
2512                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2513
2514                 switch(s->codec_id){
2515                 case CODEC_ID_H263:
2516                 case CODEC_ID_H263P:
2517                     if(!s->h263_slice_structured)
2518                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2519                     break;
2520                 case CODEC_ID_MPEG2VIDEO:
2521                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2522                 case CODEC_ID_MPEG1VIDEO:
2523                     if(s->mb_skip_run) is_gob_start=0;
2524                     break;
2525                 }
2526
2527                 if(is_gob_start){
2528                     if(s->start_mb_y != mb_y || mb_x!=0){
2529                         write_slice_end(s);
2530
2531                         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
2532                             ff_mpeg4_init_partitions(s);
2533                         }
2534                     }
2535
2536                     assert((put_bits_count(&s->pb)&7) == 0);
2537                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2538
2539                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2540                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2541                         int d= 100 / s->avctx->error_rate;
2542                         if(r % d == 0){
2543                             current_packet_size=0;
2544                             s->pb.buf_ptr= s->ptr_lastgob;
2545                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2546                         }
2547                     }
2548
2549                     if (s->avctx->rtp_callback){
2550                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2551                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2552                     }
2553                     update_mb_info(s, 1);
2554
2555                     switch(s->codec_id){
2556                     case CODEC_ID_MPEG4:
2557                         if (CONFIG_MPEG4_ENCODER) {
2558                             ff_mpeg4_encode_video_packet_header(s);
2559                             ff_mpeg4_clean_buffers(s);
2560                         }
2561                     break;
2562                     case CODEC_ID_MPEG1VIDEO:
2563                     case CODEC_ID_MPEG2VIDEO:
2564                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2565                             ff_mpeg1_encode_slice_header(s);
2566                             ff_mpeg1_clean_buffers(s);
2567                         }
2568                     break;
2569                     case CODEC_ID_H263:
2570                     case CODEC_ID_H263P:
2571                         if (CONFIG_H263_ENCODER)
2572                             ff_h263_encode_gob_header(s, mb_y);
2573                     break;
2574                     }
2575
2576                     if(s->flags&CODEC_FLAG_PASS1){
2577                         int bits= put_bits_count(&s->pb);
2578                         s->misc_bits+= bits - s->last_bits;
2579                         s->last_bits= bits;
2580                     }
2581
2582                     s->ptr_lastgob += current_packet_size;
2583                     s->first_slice_line=1;
2584                     s->resync_mb_x=mb_x;
2585                     s->resync_mb_y=mb_y;
2586                 }
2587             }
2588
2589             if(  (s->resync_mb_x   == s->mb_x)
2590                && s->resync_mb_y+1 == s->mb_y){
2591                 s->first_slice_line=0;
2592             }
2593
2594             s->mb_skipped=0;
2595             s->dquant=0; //only for QP_RD
2596
2597             update_mb_info(s, 0);
2598
2599             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2600                 int next_block=0;
2601                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2602
2603                 copy_context_before_encode(&backup_s, s, -1);
2604                 backup_s.pb= s->pb;
2605                 best_s.data_partitioning= s->data_partitioning;
2606                 best_s.partitioned_frame= s->partitioned_frame;
2607                 if(s->data_partitioning){
2608                     backup_s.pb2= s->pb2;
2609                     backup_s.tex_pb= s->tex_pb;
2610                 }
2611
2612                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2613                     s->mv_dir = MV_DIR_FORWARD;
2614                     s->mv_type = MV_TYPE_16X16;
2615                     s->mb_intra= 0;
2616                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2617                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2618                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2619                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2620                 }
2621                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2622                     s->mv_dir = MV_DIR_FORWARD;
2623                     s->mv_type = MV_TYPE_FIELD;
2624                     s->mb_intra= 0;
2625                     for(i=0; i<2; i++){
2626                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2627                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2628                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2629                     }
2630                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2631                                  &dmin, &next_block, 0, 0);
2632                 }
2633                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2634                     s->mv_dir = MV_DIR_FORWARD;
2635                     s->mv_type = MV_TYPE_16X16;
2636                     s->mb_intra= 0;
2637                     s->mv[0][0][0] = 0;
2638                     s->mv[0][0][1] = 0;
2639                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2640                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2641                 }
2642                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2643                     s->mv_dir = MV_DIR_FORWARD;
2644                     s->mv_type = MV_TYPE_8X8;
2645                     s->mb_intra= 0;
2646                     for(i=0; i<4; i++){
2647                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2648                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2649                     }
2650                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2651                                  &dmin, &next_block, 0, 0);
2652                 }
2653                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2654                     s->mv_dir = MV_DIR_FORWARD;
2655                     s->mv_type = MV_TYPE_16X16;
2656                     s->mb_intra= 0;
2657                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2658                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2659                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2660                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2661                 }
2662                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2663                     s->mv_dir = MV_DIR_BACKWARD;
2664                     s->mv_type = MV_TYPE_16X16;
2665                     s->mb_intra= 0;
2666                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2667                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2668                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2669                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2670                 }
2671                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2672                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2673                     s->mv_type = MV_TYPE_16X16;
2674                     s->mb_intra= 0;
2675                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2676                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2677                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2678                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2679                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2680                                  &dmin, &next_block, 0, 0);
2681                 }
2682                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2683                     s->mv_dir = MV_DIR_FORWARD;
2684                     s->mv_type = MV_TYPE_FIELD;
2685                     s->mb_intra= 0;
2686                     for(i=0; i<2; i++){
2687                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2688                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2689                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2690                     }
2691                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2692                                  &dmin, &next_block, 0, 0);
2693                 }
2694                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2695                     s->mv_dir = MV_DIR_BACKWARD;
2696                     s->mv_type = MV_TYPE_FIELD;
2697                     s->mb_intra= 0;
2698                     for(i=0; i<2; i++){
2699                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2700                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2701                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2702                     }
2703                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2704                                  &dmin, &next_block, 0, 0);
2705                 }
2706                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2707                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2708                     s->mv_type = MV_TYPE_FIELD;
2709                     s->mb_intra= 0;
2710                     for(dir=0; dir<2; dir++){
2711                         for(i=0; i<2; i++){
2712                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2713                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2714                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2715                         }
2716                     }
2717                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2718                                  &dmin, &next_block, 0, 0);
2719                 }
2720                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2721                     s->mv_dir = 0;
2722                     s->mv_type = MV_TYPE_16X16;
2723                     s->mb_intra= 1;
2724                     s->mv[0][0][0] = 0;
2725                     s->mv[0][0][1] = 0;
2726                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2727                                  &dmin, &next_block, 0, 0);
2728                     if(s->h263_pred || s->h263_aic){
2729                         if(best_s.mb_intra)
2730                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2731                         else
2732                             ff_clean_intra_table_entries(s); //old mode?
2733                     }
2734                 }
2735
2736                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2737                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2738                         const int last_qp= backup_s.qscale;
2739                         int qpi, qp, dc[6];
2740                         DCTELEM ac[6][16];
2741                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2742                         static const int dquant_tab[4]={-1,1,-2,2};
2743
2744                         assert(backup_s.dquant == 0);
2745
2746                         //FIXME intra
2747                         s->mv_dir= best_s.mv_dir;
2748                         s->mv_type = MV_TYPE_16X16;
2749                         s->mb_intra= best_s.mb_intra;
2750                         s->mv[0][0][0] = best_s.mv[0][0][0];
2751                         s->mv[0][0][1] = best_s.mv[0][0][1];
2752                         s->mv[1][0][0] = best_s.mv[1][0][0];
2753                         s->mv[1][0][1] = best_s.mv[1][0][1];
2754
2755                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2756                         for(; qpi<4; qpi++){
2757                             int dquant= dquant_tab[qpi];
2758                             qp= last_qp + dquant;
2759                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2760                                 continue;
2761                             backup_s.dquant= dquant;
2762                             if(s->mb_intra && s->dc_val[0]){
2763                                 for(i=0; i<6; i++){
2764                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2765                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2766                                 }
2767                             }
2768
2769                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2770                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2771                             if(best_s.qscale != qp){
2772                                 if(s->mb_intra && s->dc_val[0]){
2773                                     for(i=0; i<6; i++){
2774                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2775                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2776                                     }
2777                                 }
2778                             }
2779                         }
2780                     }
2781                 }
2782                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2783                     int mx= s->b_direct_mv_table[xy][0];
2784                     int my= s->b_direct_mv_table[xy][1];
2785
2786                     backup_s.dquant = 0;
2787                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2788                     s->mb_intra= 0;
2789                     ff_mpeg4_set_direct_mv(s, mx, my);
2790                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2791                                  &dmin, &next_block, mx, my);
2792                 }
2793                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2794                     backup_s.dquant = 0;
2795                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2796                     s->mb_intra= 0;
2797                     ff_mpeg4_set_direct_mv(s, 0, 0);
2798                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2799                                  &dmin, &next_block, 0, 0);
2800                 }
2801                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2802                     int coded=0;
2803                     for(i=0; i<6; i++)
2804                         coded |= s->block_last_index[i];
2805                     if(coded){
2806                         int mx,my;
2807                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2808                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2809                             mx=my=0; //FIXME find the one we actually used
2810                             ff_mpeg4_set_direct_mv(s, mx, my);
2811                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2812                             mx= s->mv[1][0][0];
2813                             my= s->mv[1][0][1];
2814                         }else{
2815                             mx= s->mv[0][0][0];
2816                             my= s->mv[0][0][1];
2817                         }
2818
2819                         s->mv_dir= best_s.mv_dir;
2820                         s->mv_type = best_s.mv_type;
2821                         s->mb_intra= 0;
2822 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2823                         s->mv[0][0][1] = best_s.mv[0][0][1];
2824                         s->mv[1][0][0] = best_s.mv[1][0][0];
2825                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2826                         backup_s.dquant= 0;
2827                         s->skipdct=1;
2828                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2829                                         &dmin, &next_block, mx, my);
2830                         s->skipdct=0;
2831                     }
2832                 }
2833
2834                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2835
2836                 copy_context_after_encode(s, &best_s, -1);
2837
2838                 pb_bits_count= put_bits_count(&s->pb);
2839                 flush_put_bits(&s->pb);
2840                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2841                 s->pb= backup_s.pb;
2842
2843                 if(s->data_partitioning){
2844                     pb2_bits_count= put_bits_count(&s->pb2);
2845                     flush_put_bits(&s->pb2);
2846                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2847                     s->pb2= backup_s.pb2;
2848
2849                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2850                     flush_put_bits(&s->tex_pb);
2851                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2852                     s->tex_pb= backup_s.tex_pb;
2853                 }
2854                 s->last_bits= put_bits_count(&s->pb);
2855
2856                 if (CONFIG_H263_ENCODER &&
2857                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2858                     ff_h263_update_motion_val(s);
2859
2860                 if(next_block==0){ //FIXME 16 vs linesize16
2861                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2862                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2863                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2864                 }
2865
2866                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2867                     ff_MPV_decode_mb(s, s->block);
2868             } else {
2869                 int motion_x = 0, motion_y = 0;
2870                 s->mv_type=MV_TYPE_16X16;
2871                 // only one MB-Type possible
2872
2873                 switch(mb_type){
2874                 case CANDIDATE_MB_TYPE_INTRA:
2875                     s->mv_dir = 0;
2876                     s->mb_intra= 1;
2877                     motion_x= s->mv[0][0][0] = 0;
2878                     motion_y= s->mv[0][0][1] = 0;
2879                     break;
2880                 case CANDIDATE_MB_TYPE_INTER:
2881                     s->mv_dir = MV_DIR_FORWARD;
2882                     s->mb_intra= 0;
2883                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2884                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2885                     break;
2886                 case CANDIDATE_MB_TYPE_INTER_I:
2887                     s->mv_dir = MV_DIR_FORWARD;
2888                     s->mv_type = MV_TYPE_FIELD;
2889                     s->mb_intra= 0;
2890                     for(i=0; i<2; i++){
2891                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2892                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2893                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2894                     }
2895                     break;
2896                 case CANDIDATE_MB_TYPE_INTER4V:
2897                     s->mv_dir = MV_DIR_FORWARD;
2898                     s->mv_type = MV_TYPE_8X8;
2899                     s->mb_intra= 0;
2900                     for(i=0; i<4; i++){
2901                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2902                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2903                     }
2904                     break;
2905                 case CANDIDATE_MB_TYPE_DIRECT:
2906                     if (CONFIG_MPEG4_ENCODER) {
2907                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2908                         s->mb_intra= 0;
2909                         motion_x=s->b_direct_mv_table[xy][0];
2910                         motion_y=s->b_direct_mv_table[xy][1];
2911                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2912                     }
2913                     break;
2914                 case CANDIDATE_MB_TYPE_DIRECT0:
2915                     if (CONFIG_MPEG4_ENCODER) {
2916                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2917                         s->mb_intra= 0;
2918                         ff_mpeg4_set_direct_mv(s, 0, 0);
2919                     }
2920                     break;
2921                 case CANDIDATE_MB_TYPE_BIDIR:
2922                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2923                     s->mb_intra= 0;
2924                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2925                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2926                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2927                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2928                     break;
2929                 case CANDIDATE_MB_TYPE_BACKWARD:
2930                     s->mv_dir = MV_DIR_BACKWARD;
2931                     s->mb_intra= 0;
2932                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2933                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2934                     break;
2935                 case CANDIDATE_MB_TYPE_FORWARD:
2936                     s->mv_dir = MV_DIR_FORWARD;
2937                     s->mb_intra= 0;
2938                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2939                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2940 //                    printf(" %d %d ", motion_x, motion_y);
2941                     break;
2942                 case CANDIDATE_MB_TYPE_FORWARD_I:
2943                     s->mv_dir = MV_DIR_FORWARD;
2944                     s->mv_type = MV_TYPE_FIELD;
2945                     s->mb_intra= 0;
2946                     for(i=0; i<2; i++){
2947                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2948                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2949                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2950                     }
2951                     break;
2952                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2953                     s->mv_dir = MV_DIR_BACKWARD;
2954                     s->mv_type = MV_TYPE_FIELD;
2955                     s->mb_intra= 0;
2956                     for(i=0; i<2; i++){
2957                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2958                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2959                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2960                     }
2961                     break;
2962                 case CANDIDATE_MB_TYPE_BIDIR_I:
2963                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2964                     s->mv_type = MV_TYPE_FIELD;
2965                     s->mb_intra= 0;
2966                     for(dir=0; dir<2; dir++){
2967                         for(i=0; i<2; i++){
2968                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2969                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2970                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2971                         }
2972                     }
2973                     break;
2974                 default:
2975                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2976                 }
2977
2978                 encode_mb(s, motion_x, motion_y);
2979
2980                 // RAL: Update last macroblock type
2981                 s->last_mv_dir = s->mv_dir;
2982
2983                 if (CONFIG_H263_ENCODER &&
2984                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2985                     ff_h263_update_motion_val(s);
2986
2987                 ff_MPV_decode_mb(s, s->block);
2988             }
2989
2990             /* clean the MV table in IPS frames for direct mode in B frames */
2991             if(s->mb_intra /* && I,P,S_TYPE */){
2992                 s->p_mv_table[xy][0]=0;
2993                 s->p_mv_table[xy][1]=0;
2994             }
2995
2996             if(s->flags&CODEC_FLAG_PSNR){
2997                 int w= 16;
2998                 int h= 16;
2999
3000                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3001                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3002
3003                 s->current_picture.f.error[0] += sse(
3004                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3005                     s->dest[0], w, h, s->linesize);
3006                 s->current_picture.f.error[1] += sse(
3007                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3008                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3009                 s->current_picture.f.error[2] += sse(
3010                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3011                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3012             }
3013             if(s->loop_filter){
3014                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3015                     ff_h263_loop_filter(s);
3016             }
3017 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
3018         }
3019     }
3020
3021     //not beautiful here but we must write it before flushing so it has to be here
3022     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3023         ff_msmpeg4_encode_ext_header(s);
3024
3025     write_slice_end(s);
3026
3027     /* Send the last GOB if RTP */
3028     if (s->avctx->rtp_callback) {
3029         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3030         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3031         /* Call the RTP callback to send the last GOB */
3032         emms_c();
3033         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3034     }
3035
3036     return 0;
3037 }
3038
3039 #define MERGE(field) dst->field += src->field; src->field=0
3040 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3041     MERGE(me.scene_change_score);
3042     MERGE(me.mc_mb_var_sum_temp);
3043     MERGE(me.mb_var_sum_temp);
3044 }
3045
3046 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3047     int i;
3048
3049     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3050     MERGE(dct_count[1]);
3051     MERGE(mv_bits);
3052     MERGE(i_tex_bits);
3053     MERGE(p_tex_bits);
3054     MERGE(i_count);
3055     MERGE(f_count);
3056     MERGE(b_count);
3057     MERGE(skip_count);
3058     MERGE(misc_bits);
3059     MERGE(error_count);
3060     MERGE(padding_bug_score);
3061     MERGE(current_picture.f.error[0]);
3062     MERGE(current_picture.f.error[1]);
3063     MERGE(current_picture.f.error[2]);
3064
3065     if(dst->avctx->noise_reduction){
3066         for(i=0; i<64; i++){
3067             MERGE(dct_error_sum[0][i]);
3068             MERGE(dct_error_sum[1][i]);
3069         }
3070     }
3071
3072     assert(put_bits_count(&src->pb) % 8 ==0);
3073     assert(put_bits_count(&dst->pb) % 8 ==0);
3074     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3075     flush_put_bits(&dst->pb);
3076 }
3077
3078 static int estimate_qp(MpegEncContext *s, int dry_run){
3079     if (s->next_lambda){
3080         s->current_picture_ptr->f.quality =
3081         s->current_picture.f.quality = s->next_lambda;
3082         if(!dry_run) s->next_lambda= 0;
3083     } else if (!s->fixed_qscale) {
3084         s->current_picture_ptr->f.quality =
3085         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3086         if (s->current_picture.f.quality < 0)
3087             return -1;
3088     }
3089
3090     if(s->adaptive_quant){
3091         switch(s->codec_id){
3092         case CODEC_ID_MPEG4:
3093             if (CONFIG_MPEG4_ENCODER)
3094                 ff_clean_mpeg4_qscales(s);
3095             break;
3096         case CODEC_ID_H263:
3097         case CODEC_ID_H263P:
3098         case CODEC_ID_FLV1:
3099             if (CONFIG_H263_ENCODER)
3100                 ff_clean_h263_qscales(s);
3101             break;
3102         default:
3103             ff_init_qscale_tab(s);
3104         }
3105
3106         s->lambda= s->lambda_table[0];
3107         //FIXME broken
3108     }else
3109         s->lambda = s->current_picture.f.quality;
3110 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3111     update_qscale(s);
3112     return 0;
3113 }
3114
3115 /* must be called before writing the header */
3116 static void set_frame_distances(MpegEncContext * s){
3117     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3118     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3119
3120     if(s->pict_type==AV_PICTURE_TYPE_B){
3121         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3122         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3123     }else{
3124         s->pp_time= s->time - s->last_non_b_time;
3125         s->last_non_b_time= s->time;
3126         assert(s->picture_number==0 || s->pp_time > 0);
3127     }
3128 }
3129
3130 static int encode_picture(MpegEncContext *s, int picture_number)
3131 {
3132     int i;
3133     int bits;
3134     int context_count = s->slice_context_count;
3135
3136     s->picture_number = picture_number;
3137
3138     /* Reset the average MB variance */
3139     s->me.mb_var_sum_temp    =
3140     s->me.mc_mb_var_sum_temp = 0;
3141
3142     /* we need to initialize some time vars before we can encode b-frames */
3143     // RAL: Condition added for MPEG1VIDEO
3144     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3145         set_frame_distances(s);
3146     if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4)
3147         ff_set_mpeg4_time(s);
3148
3149     s->me.scene_change_score=0;
3150
3151 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3152
3153     if(s->pict_type==AV_PICTURE_TYPE_I){
3154         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3155         else                        s->no_rounding=0;
3156     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3157         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3158             s->no_rounding ^= 1;
3159     }
3160
3161     if(s->flags & CODEC_FLAG_PASS2){
3162         if (estimate_qp(s,1) < 0)
3163             return -1;
3164         ff_get_2pass_fcode(s);
3165     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3166         if(s->pict_type==AV_PICTURE_TYPE_B)
3167             s->lambda= s->last_lambda_for[s->pict_type];
3168         else
3169             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3170         update_qscale(s);
3171     }
3172
3173     if(s->codec_id != CODEC_ID_AMV){
3174         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3175         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3176         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3177         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3178     }
3179
3180     s->mb_intra=0; //for the rate distortion & bit compare functions
3181     for(i=1; i<context_count; i++){
3182         ff_update_duplicate_context(s->thread_context[i], s);
3183     }
3184
3185     if(ff_init_me(s)<0)
3186         return -1;
3187
3188     /* Estimate motion for every MB */
3189     if(s->pict_type != AV_PICTURE_TYPE_I){
3190         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3191         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3192         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3193             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3194                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3195             }
3196         }
3197
3198         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3199     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3200         /* I-Frame */
3201         for(i=0; i<s->mb_stride*s->mb_height; i++)
3202             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3203
3204         if(!s->fixed_qscale){
3205             /* finding spatial complexity for I-frame rate control */
3206             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3207         }
3208     }
3209     for(i=1; i<context_count; i++){
3210         merge_context_after_me(s, s->thread_context[i]);
3211     }
3212     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3213     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3214     emms_c();
3215
3216     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3217         s->pict_type= AV_PICTURE_TYPE_I;
3218         for(i=0; i<s->mb_stride*s->mb_height; i++)
3219             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3220 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3221     }
3222
3223     if(!s->umvplus){
3224         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3225             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3226
3227             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3228                 int a,b;
3229                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3230                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3231                 s->f_code= FFMAX3(s->f_code, a, b);
3232             }
3233
3234             ff_fix_long_p_mvs(s);
3235             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3236             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3237                 int j;
3238                 for(i=0; i<2; i++){
3239                     for(j=0; j<2; j++)
3240                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3241                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3242                 }
3243             }
3244         }
3245
3246         if(s->pict_type==AV_PICTURE_TYPE_B){
3247             int a, b;
3248
3249             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3250             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3251             s->f_code = FFMAX(a, b);
3252
3253             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3254             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3255             s->b_code = FFMAX(a, b);
3256
3257             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3258             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3259             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3260             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3261             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3262                 int dir, j;
3263                 for(dir=0; dir<2; dir++){
3264                     for(i=0; i<2; i++){
3265                         for(j=0; j<2; j++){
3266                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3267                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3268                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3269                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3270                         }
3271                     }
3272                 }
3273             }
3274         }
3275     }
3276
3277     if (estimate_qp(s, 0) < 0)
3278         return -1;
3279
3280     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3281         s->qscale= 3; //reduce clipping problems
3282
3283     if (s->out_format == FMT_MJPEG) {
3284         /* for mjpeg, we do include qscale in the matrix */
3285         for(i=1;i<64;i++){
3286             int j= s->dsp.idct_permutation[i];
3287
3288             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3289         }
3290         s->y_dc_scale_table=
3291         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3292         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3293         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3294                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3295         s->qscale= 8;
3296     }
3297     if(s->codec_id == CODEC_ID_AMV){
3298         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3299         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3300         for(i=1;i<64;i++){
3301             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3302
3303             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3304             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3305         }
3306         s->y_dc_scale_table= y;
3307         s->c_dc_scale_table= c;
3308         s->intra_matrix[0] = 13;
3309         s->chroma_intra_matrix[0] = 14;
3310         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3311                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3312         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3313                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3314         s->qscale= 8;
3315     }
3316
3317     //FIXME var duplication
3318     s->current_picture_ptr->f.key_frame =
3319     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3320     s->current_picture_ptr->f.pict_type =
3321     s->current_picture.f.pict_type = s->pict_type;
3322
3323     if (s->current_picture.f.key_frame)
3324         s->picture_in_gop_number=0;
3325
3326     s->last_bits= put_bits_count(&s->pb);
3327     switch(s->out_format) {
3328     case FMT_MJPEG:
3329         if (CONFIG_MJPEG_ENCODER)
3330             ff_mjpeg_encode_picture_header(s);
3331         break;
3332     case FMT_H261:
3333         if (CONFIG_H261_ENCODER)
3334             ff_h261_encode_picture_header(s, picture_number);
3335         break;
3336     case FMT_H263:
3337         if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
3338             ff_wmv2_encode_picture_header(s, picture_number);
3339         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3340             ff_msmpeg4_encode_picture_header(s, picture_number);
3341         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3342             ff_mpeg4_encode_picture_header(s, picture_number);
3343         else if (CONFIG_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
3344             ff_rv10_encode_picture_header(s, picture_number);
3345         else if (CONFIG_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
3346             ff_rv20_encode_picture_header(s, picture_number);
3347         else if (CONFIG_FLV_ENCODER && s->codec_id == CODEC_ID_FLV1)
3348             ff_flv_encode_picture_header(s, picture_number);
3349         else if (CONFIG_H263_ENCODER)
3350             ff_h263_encode_picture_header(s, picture_number);
3351         break;
3352     case FMT_MPEG1:
3353         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3354             ff_mpeg1_encode_picture_header(s, picture_number);
3355         break;
3356     case FMT_H264:
3357         break;
3358     default:
3359         assert(0);
3360     }
3361     bits= put_bits_count(&s->pb);
3362     s->header_bits= bits - s->last_bits;
3363
3364     for(i=1; i<context_count; i++){
3365         update_duplicate_context_after_me(s->thread_context[i], s);
3366     }
3367     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3368     for(i=1; i<context_count; i++){
3369         merge_context_after_encode(s, s->thread_context[i]);
3370     }
3371     emms_c();
3372     return 0;
3373 }
3374
3375 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3376     const int intra= s->mb_intra;
3377     int i;
3378
3379     s->dct_count[intra]++;
3380
3381     for(i=0; i<64; i++){
3382         int level= block[i];
3383
3384         if(level){
3385             if(level>0){
3386                 s->dct_error_sum[intra][i] += level;
3387                 level -= s->dct_offset[intra][i];
3388                 if(level<0) level=0;
3389             }else{
3390                 s->dct_error_sum[intra][i] -= level;
3391                 level += s->dct_offset[intra][i];
3392                 if(level>0) level=0;
3393             }
3394             block[i]= level;
3395         }
3396     }
3397 }
3398
3399 static int dct_quantize_trellis_c(MpegEncContext *s,
3400                                   DCTELEM *block, int n,
3401                                   int qscale, int *overflow){
3402     const int *qmat;
3403     const uint8_t *scantable= s->intra_scantable.scantable;
3404     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3405     int max=0;
3406     unsigned int threshold1, threshold2;
3407     int bias=0;
3408     int run_tab[65];
3409     int level_tab[65];
3410     int score_tab[65];
3411     int survivor[65];
3412     int survivor_count;
3413     int last_run=0;
3414     int last_level=0;
3415     int last_score= 0;
3416     int last_i;
3417     int coeff[2][64];
3418     int coeff_count[64];
3419     int qmul, qadd, start_i, last_non_zero, i, dc;
3420     const int esc_length= s->ac_esc_length;
3421     uint8_t * length;
3422     uint8_t * last_length;
3423     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3424
3425     s->dsp.fdct (block);
3426
3427     if(s->dct_error_sum)
3428         s->denoise_dct(s, block);
3429     qmul= qscale*16;
3430     qadd= ((qscale-1)|1)*8;
3431
3432     if (s->mb_intra) {
3433         int q;
3434         if (!s->h263_aic) {
3435             if (n < 4)
3436                 q = s->y_dc_scale;
3437             else
3438                 q = s->c_dc_scale;
3439             q = q << 3;
3440         } else{
3441             /* For AIC we skip quant/dequant of INTRADC */
3442             q = 1 << 3;
3443             qadd=0;
3444         }
3445
3446         /* note: block[0] is assumed to be positive */
3447         block[0] = (block[0] + (q >> 1)) / q;
3448         start_i = 1;
3449         last_non_zero = 0;
3450         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3451         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3452             bias= 1<<(QMAT_SHIFT-1);
3453         length     = s->intra_ac_vlc_length;
3454         last_length= s->intra_ac_vlc_last_length;
3455     } else {
3456         start_i = 0;
3457         last_non_zero = -1;
3458         qmat = s->q_inter_matrix[qscale];
3459         length     = s->inter_ac_vlc_length;
3460         last_length= s->inter_ac_vlc_last_length;
3461     }
3462     last_i= start_i;
3463
3464     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3465     threshold2= (threshold1<<1);
3466
3467     for(i=63; i>=start_i; i--) {
3468         const int j = scantable[i];
3469         int level = block[j] * qmat[j];
3470
3471         if(((unsigned)(level+threshold1))>threshold2){
3472             last_non_zero = i;
3473             break;
3474         }
3475     }
3476
3477     for(i=start_i; i<=last_non_zero; i++) {
3478         const int j = scantable[i];
3479         int level = block[j] * qmat[j];
3480
3481 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3482 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3483         if(((unsigned)(level+threshold1))>threshold2){
3484             if(level>0){
3485                 level= (bias + level)>>QMAT_SHIFT;
3486                 coeff[0][i]= level;
3487                 coeff[1][i]= level-1;
3488 //                coeff[2][k]= level-2;
3489             }else{
3490                 level= (bias - level)>>QMAT_SHIFT;
3491                 coeff[0][i]= -level;
3492                 coeff[1][i]= -level+1;
3493 //                coeff[2][k]= -level+2;
3494             }
3495             coeff_count[i]= FFMIN(level, 2);
3496             assert(coeff_count[i]);
3497             max |=level;
3498         }else{
3499             coeff[0][i]= (level>>31)|1;
3500             coeff_count[i]= 1;
3501         }
3502     }
3503
3504     *overflow= s->max_qcoeff < max; //overflow might have happened
3505
3506     if(last_non_zero < start_i){
3507         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3508         return last_non_zero;
3509     }
3510
3511     score_tab[start_i]= 0;
3512     survivor[0]= start_i;
3513     survivor_count= 1;
3514
3515     for(i=start_i; i<=last_non_zero; i++){
3516         int level_index, j, zero_distortion;
3517         int dct_coeff= FFABS(block[ scantable[i] ]);
3518         int best_score=256*256*256*120;
3519
3520         if (   s->dsp.fdct == ff_fdct_ifast
3521 #ifndef FAAN_POSTSCALE
3522             || s->dsp.fdct == ff_faandct
3523 #endif
3524            )
3525             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3526         zero_distortion= dct_coeff*dct_coeff;
3527
3528         for(level_index=0; level_index < coeff_count[i]; level_index++){
3529             int distortion;
3530             int level= coeff[level_index][i];
3531             const int alevel= FFABS(level);
3532             int unquant_coeff;
3533
3534             assert(level);
3535
3536             if(s->out_format == FMT_H263){
3537                 unquant_coeff= alevel*qmul + qadd;
3538             }else{ //MPEG1
3539                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3540                 if(s->mb_intra){
3541                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3542                         unquant_coeff =   (unquant_coeff - 1) | 1;
3543                 }else{
3544                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3545                         unquant_coeff =   (unquant_coeff - 1) | 1;
3546                 }
3547                 unquant_coeff<<= 3;
3548             }
3549
3550             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3551             level+=64;
3552             if((level&(~127)) == 0){
3553                 for(j=survivor_count-1; j>=0; j--){
3554                     int run= i - survivor[j];
3555                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3556                     score += score_tab[i-run];
3557
3558                     if(score < best_score){
3559                         best_score= score;
3560                         run_tab[i+1]= run;
3561                         level_tab[i+1]= level-64;
3562                     }
3563                 }
3564
3565                 if(s->out_format == FMT_H263){
3566                     for(j=survivor_count-1; j>=0; j--){
3567                         int run= i - survivor[j];
3568                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3569                         score += score_tab[i-run];
3570                         if(score < last_score){
3571                             last_score= score;
3572                             last_run= run;
3573                             last_level= level-64;
3574                             last_i= i+1;
3575                         }
3576                     }
3577                 }
3578             }else{
3579                 distortion += esc_length*lambda;
3580                 for(j=survivor_count-1; j>=0; j--){
3581                     int run= i - survivor[j];
3582                     int score= distortion + score_tab[i-run];
3583
3584                     if(score < best_score){
3585                         best_score= score;
3586                         run_tab[i+1]= run;
3587                         level_tab[i+1]= level-64;
3588                     }
3589                 }
3590
3591                 if(s->out_format == FMT_H263){
3592                   for(j=survivor_count-1; j>=0; j--){
3593                         int run= i - survivor[j];
3594                         int score= distortion + score_tab[i-run];
3595                         if(score < last_score){
3596                             last_score= score;
3597                             last_run= run;
3598                             last_level= level-64;
3599                             last_i= i+1;
3600                         }
3601                     }
3602                 }
3603             }
3604         }
3605
3606         score_tab[i+1]= best_score;
3607
3608         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3609         if(last_non_zero <= 27){
3610             for(; survivor_count; survivor_count--){
3611                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3612                     break;
3613             }
3614         }else{
3615             for(; survivor_count; survivor_count--){
3616                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3617                     break;
3618             }
3619         }
3620
3621         survivor[ survivor_count++ ]= i+1;
3622     }
3623
3624     if(s->out_format != FMT_H263){
3625         last_score= 256*256*256*120;
3626         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3627             int score= score_tab[i];
3628             if(i) score += lambda*2; //FIXME exacter?
3629
3630             if(score < last_score){
3631                 last_score= score;
3632                 last_i= i;
3633                 last_level= level_tab[i];
3634                 last_run= run_tab[i];
3635             }
3636         }
3637     }
3638
3639     s->coded_score[n] = last_score;
3640
3641     dc= FFABS(block[0]);
3642     last_non_zero= last_i - 1;
3643     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3644
3645     if(last_non_zero < start_i)
3646         return last_non_zero;
3647
3648     if(last_non_zero == 0 && start_i == 0){
3649         int best_level= 0;
3650         int best_score= dc * dc;
3651
3652         for(i=0; i<coeff_count[0]; i++){
3653             int level= coeff[i][0];
3654             int alevel= FFABS(level);
3655             int unquant_coeff, score, distortion;
3656
3657             if(s->out_format == FMT_H263){
3658                     unquant_coeff= (alevel*qmul + qadd)>>3;
3659             }else{ //MPEG1
3660                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3661                     unquant_coeff =   (unquant_coeff - 1) | 1;
3662             }
3663             unquant_coeff = (unquant_coeff + 4) >> 3;
3664             unquant_coeff<<= 3 + 3;
3665
3666             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3667             level+=64;
3668             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3669             else                    score= distortion + esc_length*lambda;
3670
3671             if(score < best_score){
3672                 best_score= score;
3673                 best_level= level - 64;
3674             }
3675         }
3676         block[0]= best_level;
3677         s->coded_score[n] = best_score - dc*dc;
3678         if(best_level == 0) return -1;
3679         else                return last_non_zero;
3680     }
3681
3682     i= last_i;
3683     assert(last_level);
3684
3685     block[ perm_scantable[last_non_zero] ]= last_level;
3686     i -= last_run + 1;
3687
3688     for(; i>start_i; i -= run_tab[i] + 1){
3689         block[ perm_scantable[i-1] ]= level_tab[i];
3690     }
3691
3692     return last_non_zero;
3693 }
3694
3695 //#define REFINE_STATS 1
3696 static int16_t basis[64][64];
3697
3698 static void build_basis(uint8_t *perm){
3699     int i, j, x, y;
3700     emms_c();
3701     for(i=0; i<8; i++){
3702         for(j=0; j<8; j++){
3703             for(y=0; y<8; y++){
3704                 for(x=0; x<8; x++){
3705                     double s= 0.25*(1<<BASIS_SHIFT);
3706                     int index= 8*i + j;
3707                     int perm_index= perm[index];
3708                     if(i==0) s*= sqrt(0.5);
3709                     if(j==0) s*= sqrt(0.5);
3710                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3711                 }
3712             }
3713         }
3714     }
3715 }
3716
3717 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3718                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3719                         int n, int qscale){
3720     int16_t rem[64];
3721     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3722     const uint8_t *scantable= s->intra_scantable.scantable;
3723     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3724 //    unsigned int threshold1, threshold2;
3725 //    int bias=0;
3726     int run_tab[65];
3727     int prev_run=0;
3728     int prev_level=0;
3729     int qmul, qadd, start_i, last_non_zero, i, dc;
3730     uint8_t * length;
3731     uint8_t * last_length;
3732     int lambda;
3733     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3734 #ifdef REFINE_STATS
3735 static int count=0;
3736 static int after_last=0;
3737 static int to_zero=0;
3738 static int from_zero=0;
3739 static int raise=0;
3740 static int lower=0;
3741 static int messed_sign=0;
3742 #endif
3743
3744     if(basis[0][0] == 0)
3745         build_basis(s->dsp.idct_permutation);
3746
3747     qmul= qscale*2;
3748     qadd= (qscale-1)|1;
3749     if (s->mb_intra) {
3750         if (!s->h263_aic) {
3751             if (n < 4)
3752                 q = s->y_dc_scale;
3753             else
3754                 q = s->c_dc_scale;
3755         } else{
3756             /* For AIC we skip quant/dequant of INTRADC */
3757             q = 1;
3758             qadd=0;
3759         }
3760         q <<= RECON_SHIFT-3;
3761         /* note: block[0] is assumed to be positive */
3762         dc= block[0]*q;
3763 //        block[0] = (block[0] + (q >> 1)) / q;
3764         start_i = 1;
3765 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3766 //            bias= 1<<(QMAT_SHIFT-1);
3767         length     = s->intra_ac_vlc_length;
3768         last_length= s->intra_ac_vlc_last_length;
3769     } else {
3770         dc= 0;
3771         start_i = 0;
3772         length     = s->inter_ac_vlc_length;
3773         last_length= s->inter_ac_vlc_last_length;
3774     }
3775     last_non_zero = s->block_last_index[n];
3776
3777 #ifdef REFINE_STATS
3778 {START_TIMER
3779 #endif
3780     dc += (1<<(RECON_SHIFT-1));
3781     for(i=0; i<64; i++){
3782         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3783     }
3784 #ifdef REFINE_STATS
3785 STOP_TIMER("memset rem[]")}
3786 #endif
3787     sum=0;
3788     for(i=0; i<64; i++){
3789         int one= 36;
3790         int qns=4;
3791         int w;
3792
3793         w= FFABS(weight[i]) + qns*one;
3794         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3795
3796         weight[i] = w;
3797 //        w=weight[i] = (63*qns + (w/2)) / w;
3798
3799         assert(w>0);
3800         assert(w<(1<<6));
3801         sum += w*w;
3802     }
3803     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3804 #ifdef REFINE_STATS
3805 {START_TIMER
3806 #endif
3807     run=0;
3808     rle_index=0;
3809     for(i=start_i; i<=last_non_zero; i++){
3810         int j= perm_scantable[i];
3811         const int level= block[j];
3812         int coeff;
3813
3814         if(level){
3815             if(level<0) coeff= qmul*level - qadd;
3816             else        coeff= qmul*level + qadd;
3817             run_tab[rle_index++]=run;
3818             run=0;
3819
3820             s->dsp.add_8x8basis(rem, basis[j], coeff);
3821         }else{
3822             run++;
3823         }
3824     }
3825 #ifdef REFINE_STATS
3826 if(last_non_zero>0){
3827 STOP_TIMER("init rem[]")
3828 }
3829 }
3830
3831 {START_TIMER
3832 #endif
3833     for(;;){
3834         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3835         int best_coeff=0;
3836         int best_change=0;
3837         int run2, best_unquant_change=0, analyze_gradient;
3838 #ifdef REFINE_STATS
3839 {START_TIMER
3840 #endif
3841         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3842
3843         if(analyze_gradient){
3844 #ifdef REFINE_STATS
3845 {START_TIMER
3846 #endif
3847             for(i=0; i<64; i++){
3848                 int w= weight[i];
3849
3850                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3851             }
3852 #ifdef REFINE_STATS
3853 STOP_TIMER("rem*w*w")}
3854 {START_TIMER
3855 #endif
3856             s->dsp.fdct(d1);
3857 #ifdef REFINE_STATS
3858 STOP_TIMER("dct")}
3859 #endif
3860         }
3861
3862         if(start_i){
3863             const int level= block[0];
3864             int change, old_coeff;
3865
3866             assert(s->mb_intra);
3867
3868             old_coeff= q*level;
3869
3870             for(change=-1; change<=1; change+=2){
3871                 int new_level= level + change;
3872                 int score, new_coeff;
3873
3874                 new_coeff= q*new_level;
3875                 if(new_coeff >= 2048 || new_coeff < 0)
3876                     continue;
3877
3878                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3879                 if(score<best_score){
3880                     best_score= score;
3881                     best_coeff= 0;
3882                     best_change= change;
3883                     best_unquant_change= new_coeff - old_coeff;
3884                 }
3885             }
3886         }
3887
3888         run=0;
3889         rle_index=0;
3890         run2= run_tab[rle_index++];
3891         prev_level=0;
3892         prev_run=0;
3893
3894         for(i=start_i; i<64; i++){
3895             int j= perm_scantable[i];
3896             const int level= block[j];
3897             int change, old_coeff;
3898
3899             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3900                 break;
3901
3902             if(level){
3903                 if(level<0) old_coeff= qmul*level - qadd;
3904                 else        old_coeff= qmul*level + qadd;
3905                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3906             }else{
3907                 old_coeff=0;
3908                 run2--;
3909                 assert(run2>=0 || i >= last_non_zero );
3910             }
3911
3912             for(change=-1; change<=1; change+=2){
3913                 int new_level= level + change;
3914                 int score, new_coeff, unquant_change;
3915
3916                 score=0;
3917                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3918                    continue;
3919
3920                 if(new_level){
3921                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3922                     else            new_coeff= qmul*new_level + qadd;
3923                     if(new_coeff >= 2048 || new_coeff <= -2048)
3924                         continue;
3925                     //FIXME check for overflow
3926
3927                     if(level){
3928                         if(level < 63 && level > -63){
3929                             if(i < last_non_zero)
3930                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3931                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3932                             else
3933                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3934                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3935                         }
3936                     }else{
3937                         assert(FFABS(new_level)==1);
3938
3939                         if(analyze_gradient){
3940                             int g= d1[ scantable[i] ];
3941                             if(g && (g^new_level) >= 0)
3942                                 continue;
3943                         }
3944
3945                         if(i < last_non_zero){
3946                             int next_i= i + run2 + 1;
3947                             int next_level= block[ perm_scantable[next_i] ] + 64;
3948
3949                             if(next_level&(~127))
3950                                 next_level= 0;
3951
3952                             if(next_i < last_non_zero)
3953                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3954                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3955                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3956                             else
3957                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3958                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3959                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3960                         }else{
3961                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3962                             if(prev_level){
3963                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3964                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3965                             }
3966                         }
3967                     }
3968                 }else{
3969                     new_coeff=0;
3970                     assert(FFABS(level)==1);
3971
3972                     if(i < last_non_zero){
3973                         int next_i= i + run2 + 1;
3974                         int next_level= block[ perm_scantable[next_i] ] + 64;
3975
3976                         if(next_level&(~127))
3977                             next_level= 0;
3978
3979                         if(next_i < last_non_zero)
3980                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3981                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3982                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3983                         else
3984                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3985                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3986                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3987                     }else{
3988                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3989                         if(prev_level){
3990                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3991                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3992                         }
3993                     }
3994                 }
3995
3996                 score *= lambda;
3997
3998                 unquant_change= new_coeff - old_coeff;
3999                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4000
4001                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4002                 if(score<best_score){
4003                     best_score= score;
4004                     best_coeff= i;
4005                     best_change= change;
4006                     best_unquant_change= unquant_change;
4007                 }
4008             }
4009             if(level){
4010                 prev_level= level + 64;
4011                 if(prev_level&(~127))
4012                     prev_level= 0;
4013                 prev_run= run;
4014                 run=0;
4015             }else{
4016                 run++;
4017             }
4018         }
4019 #ifdef REFINE_STATS
4020 STOP_TIMER("iterative step")}
4021 #endif
4022
4023         if(best_change){
4024             int j= perm_scantable[ best_coeff ];
4025
4026             block[j] += best_change;
4027
4028             if(best_coeff > last_non_zero){
4029                 last_non_zero= best_coeff;
4030                 assert(block[j]);
4031 #ifdef REFINE_STATS
4032 after_last++;
4033 #endif
4034             }else{
4035 #ifdef REFINE_STATS
4036 if(block[j]){
4037     if(block[j] - best_change){
4038         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4039             raise++;
4040         }else{
4041             lower++;
4042         }
4043     }else{
4044         from_zero++;
4045     }
4046 }else{
4047     to_zero++;
4048 }
4049 #endif
4050                 for(; last_non_zero>=start_i; last_non_zero--){
4051                     if(block[perm_scantable[last_non_zero]])
4052                         break;
4053                 }
4054             }
4055 #ifdef REFINE_STATS
4056 count++;
4057 if(256*256*256*64 % count == 0){
4058     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4059 }
4060 #endif
4061             run=0;
4062             rle_index=0;
4063             for(i=start_i; i<=last_non_zero; i++){
4064                 int j= perm_scantable[i];
4065                 const int level= block[j];
4066
4067                  if(level){
4068                      run_tab[rle_index++]=run;
4069                      run=0;
4070                  }else{
4071                      run++;
4072                  }
4073             }
4074
4075             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4076         }else{
4077             break;
4078         }
4079     }
4080 #ifdef REFINE_STATS
4081 if(last_non_zero>0){
4082 STOP_TIMER("iterative search")
4083 }
4084 }
4085 #endif
4086
4087     return last_non_zero;
4088 }
4089
4090 int ff_dct_quantize_c(MpegEncContext *s,
4091                         DCTELEM *block, int n,
4092                         int qscale, int *overflow)
4093 {
4094     int i, j, level, last_non_zero, q, start_i;
4095     const int *qmat;
4096     const uint8_t *scantable= s->intra_scantable.scantable;
4097     int bias;
4098     int max=0;
4099     unsigned int threshold1, threshold2;
4100
4101     s->dsp.fdct (block);
4102
4103     if(s->dct_error_sum)
4104         s->denoise_dct(s, block);
4105
4106     if (s->mb_intra) {
4107         if (!s->h263_aic) {
4108             if (n < 4)
4109                 q = s->y_dc_scale;
4110             else
4111                 q = s->c_dc_scale;
4112             q = q << 3;
4113         } else
4114             /* For AIC we skip quant/dequant of INTRADC */
4115             q = 1 << 3;
4116
4117         /* note: block[0] is assumed to be positive */
4118         block[0] = (block[0] + (q >> 1)) / q;
4119         start_i = 1;
4120         last_non_zero = 0;
4121         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4122         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4123     } else {
4124         start_i = 0;
4125         last_non_zero = -1;
4126         qmat = s->q_inter_matrix[qscale];
4127         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4128     }
4129     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4130     threshold2= (threshold1<<1);
4131     for(i=63;i>=start_i;i--) {
4132         j = scantable[i];
4133         level = block[j] * qmat[j];
4134
4135         if(((unsigned)(level+threshold1))>threshold2){
4136             last_non_zero = i;
4137             break;
4138         }else{
4139             block[j]=0;
4140         }
4141     }
4142     for(i=start_i; i<=last_non_zero; i++) {
4143         j = scantable[i];
4144         level = block[j] * qmat[j];
4145
4146 //        if(   bias+level >= (1<<QMAT_SHIFT)
4147 //           || bias-level >= (1<<QMAT_SHIFT)){
4148         if(((unsigned)(level+threshold1))>threshold2){
4149             if(level>0){
4150                 level= (bias + level)>>QMAT_SHIFT;
4151                 block[j]= level;
4152             }else{
4153                 level= (bias - level)>>QMAT_SHIFT;
4154                 block[j]= -level;
4155             }
4156             max |=level;
4157         }else{
4158             block[j]=0;
4159         }
4160     }
4161     *overflow= s->max_qcoeff < max; //overflow might have happened
4162
4163     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4164     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4165         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4166
4167     return last_non_zero;
4168 }
4169
4170 #define OFFSET(x) offsetof(MpegEncContext, x)
4171 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4172 static const AVOption h263_options[] = {
4173     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4174     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4175     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { 0 }, 0, INT_MAX, VE },
4176     FF_MPV_COMMON_OPTS
4177     { NULL },
4178 };
4179
4180 static const AVClass h263_class = {
4181     .class_name = "H.263 encoder",
4182     .item_name  = av_default_item_name,
4183     .option     = h263_options,
4184     .version    = LIBAVUTIL_VERSION_INT,
4185 };
4186
4187 AVCodec ff_h263_encoder = {
4188     .name           = "h263",
4189     .type           = AVMEDIA_TYPE_VIDEO,
4190     .id             = CODEC_ID_H263,
4191     .priv_data_size = sizeof(MpegEncContext),
4192     .init           = ff_MPV_encode_init,
4193     .encode2        = ff_MPV_encode_picture,
4194     .close          = ff_MPV_encode_end,
4195     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4196     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4197     .priv_class     = &h263_class,
4198 };
4199
4200 static const AVOption h263p_options[] = {
4201     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4202     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4203     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4204     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4205     FF_MPV_COMMON_OPTS
4206     { NULL },
4207 };
4208 static const AVClass h263p_class = {
4209     .class_name = "H.263p encoder",
4210     .item_name  = av_default_item_name,
4211     .option     = h263p_options,
4212     .version    = LIBAVUTIL_VERSION_INT,
4213 };
4214
4215 AVCodec ff_h263p_encoder = {
4216     .name           = "h263p",
4217     .type           = AVMEDIA_TYPE_VIDEO,
4218     .id             = CODEC_ID_H263P,
4219     .priv_data_size = sizeof(MpegEncContext),
4220     .init           = ff_MPV_encode_init,
4221     .encode2        = ff_MPV_encode_picture,
4222     .close          = ff_MPV_encode_end,
4223     .capabilities = CODEC_CAP_SLICE_THREADS,
4224     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4225     .long_name= NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4226     .priv_class     = &h263p_class,
4227 };
4228
4229 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4230
4231 AVCodec ff_msmpeg4v2_encoder = {
4232     .name           = "msmpeg4v2",
4233     .type           = AVMEDIA_TYPE_VIDEO,
4234     .id             = CODEC_ID_MSMPEG4V2,
4235     .priv_data_size = sizeof(MpegEncContext),
4236     .init           = ff_MPV_encode_init,
4237     .encode2        = ff_MPV_encode_picture,
4238     .close          = ff_MPV_encode_end,
4239     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4240     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4241     .priv_class     = &msmpeg4v2_class,
4242 };
4243
4244 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4245
4246 AVCodec ff_msmpeg4v3_encoder = {
4247     .name           = "msmpeg4",
4248     .type           = AVMEDIA_TYPE_VIDEO,
4249     .id             = CODEC_ID_MSMPEG4V3,
4250     .priv_data_size = sizeof(MpegEncContext),
4251     .init           = ff_MPV_encode_init,
4252     .encode2        = ff_MPV_encode_picture,
4253     .close          = ff_MPV_encode_end,
4254     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4255     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4256     .priv_class     = &msmpeg4v3_class,
4257 };
4258
4259 FF_MPV_GENERIC_CLASS(wmv1)
4260
4261 AVCodec ff_wmv1_encoder = {
4262     .name           = "wmv1",
4263     .type           = AVMEDIA_TYPE_VIDEO,
4264     .id             = CODEC_ID_WMV1,
4265     .priv_data_size = sizeof(MpegEncContext),
4266     .init           = ff_MPV_encode_init,
4267     .encode2        = ff_MPV_encode_picture,
4268     .close          = ff_MPV_encode_end,
4269     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4270     .long_name= NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4271     .priv_class     = &wmv1_class,
4272 };