]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
2e0e9820f93c6da7d3dd3771aadccd0989f89561
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "mpegvideo_common.h"
37 #include "h263.h"
38 #include "mjpegenc.h"
39 #include "msmpeg4.h"
40 #include "faandct.h"
41 #include "thread.h"
42 #include "aandcttab.h"
43 #include "flv.h"
44 #include "mpeg4video.h"
45 #include "internal.h"
46 #include <limits.h>
47
48 //#undef NDEBUG
49 //#include <assert.h>
50
51 static int encode_picture(MpegEncContext *s, int picture_number);
52 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
53 static int sse_mb(MpegEncContext *s);
54 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
55 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
56
57 /* enable all paranoid tests for rounding, overflows, etc... */
58 //#define PARANOID
59
60 //#define DEBUG
61
62 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
63 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
64
65 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
66                        uint16_t (*qmat16)[2][64],
67                        const uint16_t *quant_matrix,
68                        int bias, int qmin, int qmax, int intra)
69 {
70     int qscale;
71     int shift = 0;
72
73     for (qscale = qmin; qscale <= qmax; qscale++) {
74         int i;
75         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
76             dsp->fdct == ff_jpeg_fdct_islow_10
77 #ifdef FAAN_POSTSCALE
78             || dsp->fdct == ff_faandct
79 #endif
80             ) {
81             for (i = 0; i < 64; i++) {
82                 const int j = dsp->idct_permutation[i];
83                 /* 16 <= qscale * quant_matrix[i] <= 7905
84                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
85                  *             19952 <=              x  <= 249205026
86                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
87                  *           3444240 >= (1 << 36) / (x) >= 275 */
88
89                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
90                                         (qscale * quant_matrix[j]));
91             }
92         } else if (dsp->fdct == fdct_ifast
93 #ifndef FAAN_POSTSCALE
94                    || dsp->fdct == ff_faandct
95 #endif
96                    ) {
97             for (i = 0; i < 64; i++) {
98                 const int j = dsp->idct_permutation[i];
99                 /* 16 <= qscale * quant_matrix[i] <= 7905
100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
101                  *             19952 <=              x  <= 249205026
102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
103                  *           3444240 >= (1 << 36) / (x) >= 275 */
104
105                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
106                                         (ff_aanscales[i] * qscale *
107                                          quant_matrix[j]));
108             }
109         } else {
110             for (i = 0; i < 64; i++) {
111                 const int j = dsp->idct_permutation[i];
112                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
113                  * Assume x = qscale * quant_matrix[i]
114                  * So             16 <=              x  <= 7905
115                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
116                  * so          32768 >= (1 << 19) / (x) >= 67 */
117                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
118                                         (qscale * quant_matrix[j]));
119                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
120                 //                    (qscale * quant_matrix[i]);
121                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
122                                        (qscale * quant_matrix[j]);
123
124                 if (qmat16[qscale][0][i] == 0 ||
125                     qmat16[qscale][0][i] == 128 * 256)
126                     qmat16[qscale][0][i] = 128 * 256 - 1;
127                 qmat16[qscale][1][i] =
128                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
129                                 qmat16[qscale][0][i]);
130             }
131         }
132
133         for (i = intra; i < 64; i++) {
134             int64_t max = 8191;
135             if (dsp->fdct == fdct_ifast
136 #ifndef FAAN_POSTSCALE
137                 || dsp->fdct == ff_faandct
138 #endif
139                ) {
140                 max = (8191LL * ff_aanscales[i]) >> 14;
141             }
142             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
143                 shift++;
144             }
145         }
146     }
147     if (shift) {
148         av_log(NULL, AV_LOG_INFO,
149                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
150                QMAT_SHIFT - shift);
151     }
152 }
153
154 static inline void update_qscale(MpegEncContext *s)
155 {
156     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
157                 (FF_LAMBDA_SHIFT + 7);
158     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
159
160     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
161                  FF_LAMBDA_SHIFT;
162 }
163
164 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
165 {
166     int i;
167
168     if (matrix) {
169         put_bits(pb, 1, 1);
170         for (i = 0; i < 64; i++) {
171             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
172         }
173     } else
174         put_bits(pb, 1, 0);
175 }
176
177 /**
178  * init s->current_picture.qscale_table from s->lambda_table
179  */
180 void ff_init_qscale_tab(MpegEncContext *s)
181 {
182     int8_t * const qscale_table = s->current_picture.f.qscale_table;
183     int i;
184
185     for (i = 0; i < s->mb_num; i++) {
186         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
187         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
188         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
189                                                   s->avctx->qmax);
190     }
191 }
192
193 static void copy_picture_attributes(MpegEncContext *s,
194                                     AVFrame *dst,
195                                     AVFrame *src)
196 {
197     int i;
198
199     dst->pict_type              = src->pict_type;
200     dst->quality                = src->quality;
201     dst->coded_picture_number   = src->coded_picture_number;
202     dst->display_picture_number = src->display_picture_number;
203     //dst->reference              = src->reference;
204     dst->pts                    = src->pts;
205     dst->interlaced_frame       = src->interlaced_frame;
206     dst->top_field_first        = src->top_field_first;
207
208     if (s->avctx->me_threshold) {
209         if (!src->motion_val[0])
210             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
211         if (!src->mb_type)
212             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
213         if (!src->ref_index[0])
214             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
215         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
216             av_log(s->avctx, AV_LOG_ERROR,
217                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
218                    src->motion_subsample_log2, dst->motion_subsample_log2);
219
220         memcpy(dst->mb_type, src->mb_type,
221                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
222
223         for (i = 0; i < 2; i++) {
224             int stride = ((16 * s->mb_width ) >>
225                           src->motion_subsample_log2) + 1;
226             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
227
228             if (src->motion_val[i] &&
229                 src->motion_val[i] != dst->motion_val[i]) {
230                 memcpy(dst->motion_val[i], src->motion_val[i],
231                        2 * stride * height * sizeof(int16_t));
232             }
233             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
234                 memcpy(dst->ref_index[i], src->ref_index[i],
235                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
236             }
237         }
238     }
239 }
240
241 static void update_duplicate_context_after_me(MpegEncContext *dst,
242                                               MpegEncContext *src)
243 {
244 #define COPY(a) dst->a= src->a
245     COPY(pict_type);
246     COPY(current_picture);
247     COPY(f_code);
248     COPY(b_code);
249     COPY(qscale);
250     COPY(lambda);
251     COPY(lambda2);
252     COPY(picture_in_gop_number);
253     COPY(gop_picture_number);
254     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
255     COPY(progressive_frame);    // FIXME don't set in encode_header
256     COPY(partitioned_frame);    // FIXME don't set in encode_header
257 #undef COPY
258 }
259
260 /**
261  * Set the given MpegEncContext to defaults for encoding.
262  * the changed fields will not depend upon the prior state of the MpegEncContext.
263  */
264 static void MPV_encode_defaults(MpegEncContext *s)
265 {
266     int i;
267     MPV_common_defaults(s);
268
269     for (i = -16; i < 16; i++) {
270         default_fcode_tab[i + MAX_MV] = 1;
271     }
272     s->me.mv_penalty = default_mv_penalty;
273     s->fcode_tab     = default_fcode_tab;
274 }
275
276 /* init video encoder */
277 av_cold int MPV_encode_init(AVCodecContext *avctx)
278 {
279     MpegEncContext *s = avctx->priv_data;
280     int i;
281     int chroma_h_shift, chroma_v_shift;
282
283     MPV_encode_defaults(s);
284
285     switch (avctx->codec_id) {
286     case CODEC_ID_MPEG2VIDEO:
287         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
288             avctx->pix_fmt != PIX_FMT_YUV422P) {
289             av_log(avctx, AV_LOG_ERROR,
290                    "only YUV420 and YUV422 are supported\n");
291             return -1;
292         }
293         break;
294     case CODEC_ID_LJPEG:
295         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
296             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
297             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
298             avctx->pix_fmt != PIX_FMT_BGRA     &&
299             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
300               avctx->pix_fmt != PIX_FMT_YUV422P &&
301               avctx->pix_fmt != PIX_FMT_YUV444P) ||
302              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
303             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
304             return -1;
305         }
306         break;
307     case CODEC_ID_MJPEG:
308         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
309             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
310             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
311               avctx->pix_fmt != PIX_FMT_YUV422P) ||
312              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
313             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
314             return -1;
315         }
316         break;
317     default:
318         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
319             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
320             return -1;
321         }
322     }
323
324     switch (avctx->pix_fmt) {
325     case PIX_FMT_YUVJ422P:
326     case PIX_FMT_YUV422P:
327         s->chroma_format = CHROMA_422;
328         break;
329     case PIX_FMT_YUVJ420P:
330     case PIX_FMT_YUV420P:
331     default:
332         s->chroma_format = CHROMA_420;
333         break;
334     }
335
336     s->bit_rate = avctx->bit_rate;
337     s->width    = avctx->width;
338     s->height   = avctx->height;
339     if (avctx->gop_size > 600 &&
340         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
341         av_log(avctx, AV_LOG_ERROR,
342                "Warning keyframe interval too large! reducing it ...\n");
343         avctx->gop_size = 600;
344     }
345     s->gop_size     = avctx->gop_size;
346     s->avctx        = avctx;
347     s->flags        = avctx->flags;
348     s->flags2       = avctx->flags2;
349     s->max_b_frames = avctx->max_b_frames;
350     s->codec_id     = avctx->codec->id;
351     s->luma_elim_threshold   = avctx->luma_elim_threshold;
352     s->chroma_elim_threshold = avctx->chroma_elim_threshold;
353     s->strict_std_compliance = avctx->strict_std_compliance;
354     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
355     s->mpeg_quant         = avctx->mpeg_quant;
356     s->rtp_mode           = !!avctx->rtp_payload_size;
357     s->intra_dc_precision = avctx->intra_dc_precision;
358     s->user_specified_pts = AV_NOPTS_VALUE;
359
360     if (s->gop_size <= 1) {
361         s->intra_only = 1;
362         s->gop_size   = 12;
363     } else {
364         s->intra_only = 0;
365     }
366
367     s->me_method = avctx->me_method;
368
369     /* Fixed QSCALE */
370     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
371
372     s->adaptive_quant = (s->avctx->lumi_masking ||
373                          s->avctx->dark_masking ||
374                          s->avctx->temporal_cplx_masking ||
375                          s->avctx->spatial_cplx_masking  ||
376                          s->avctx->p_masking      ||
377                          s->avctx->border_masking ||
378                          (s->flags & CODEC_FLAG_QP_RD)) &&
379                         !s->fixed_qscale;
380
381     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
382
383     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
384         av_log(avctx, AV_LOG_ERROR,
385                "a vbv buffer size is needed, "
386                "for encoding with a maximum bitrate\n");
387         return -1;
388     }
389
390     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
391         av_log(avctx, AV_LOG_INFO,
392                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
393     }
394
395     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
396         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
397         return -1;
398     }
399
400     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
401         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
402         return -1;
403     }
404
405     if (avctx->rc_max_rate &&
406         avctx->rc_max_rate == avctx->bit_rate &&
407         avctx->rc_max_rate != avctx->rc_min_rate) {
408         av_log(avctx, AV_LOG_INFO,
409                "impossible bitrate constraints, this will fail\n");
410     }
411
412     if (avctx->rc_buffer_size &&
413         avctx->bit_rate * (int64_t)avctx->time_base.num >
414             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
415         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
416         return -1;
417     }
418
419     if (!s->fixed_qscale &&
420         avctx->bit_rate * av_q2d(avctx->time_base) >
421             avctx->bit_rate_tolerance) {
422         av_log(avctx, AV_LOG_ERROR,
423                "bitrate tolerance too small for bitrate\n");
424         return -1;
425     }
426
427     if (s->avctx->rc_max_rate &&
428         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
429         (s->codec_id == CODEC_ID_MPEG1VIDEO ||
430          s->codec_id == CODEC_ID_MPEG2VIDEO) &&
431         90000LL * (avctx->rc_buffer_size - 1) >
432             s->avctx->rc_max_rate * 0xFFFFLL) {
433         av_log(avctx, AV_LOG_INFO,
434                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
435                "specified vbv buffer is too large for the given bitrate!\n");
436     }
437
438     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != CODEC_ID_MPEG4 &&
439         s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P &&
440         s->codec_id != CODEC_ID_FLV1) {
441         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
442         return -1;
443     }
444
445     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
446         av_log(avctx, AV_LOG_ERROR,
447                "OBMC is only supported with simple mb decision\n");
448         return -1;
449     }
450
451     if (s->quarter_sample && s->codec_id != CODEC_ID_MPEG4) {
452         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
453         return -1;
454     }
455
456     if (s->max_b_frames                    &&
457         s->codec_id != CODEC_ID_MPEG4      &&
458         s->codec_id != CODEC_ID_MPEG1VIDEO &&
459         s->codec_id != CODEC_ID_MPEG2VIDEO) {
460         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
461         return -1;
462     }
463
464     if ((s->codec_id == CODEC_ID_MPEG4 ||
465          s->codec_id == CODEC_ID_H263  ||
466          s->codec_id == CODEC_ID_H263P) &&
467         (avctx->sample_aspect_ratio.num > 255 ||
468          avctx->sample_aspect_ratio.den > 255)) {
469         av_log(avctx, AV_LOG_ERROR,
470                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
471                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
472         return -1;
473     }
474
475     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
476         s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO) {
477         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
478         return -1;
479     }
480
481     // FIXME mpeg2 uses that too
482     if (s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4) {
483         av_log(avctx, AV_LOG_ERROR,
484                "mpeg2 style quantization not supported by codec\n");
485         return -1;
486     }
487
488     if ((s->flags & CODEC_FLAG_CBP_RD) && !avctx->trellis) {
489         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
490         return -1;
491     }
492
493     if ((s->flags & CODEC_FLAG_QP_RD) &&
494         s->avctx->mb_decision != FF_MB_DECISION_RD) {
495         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
496         return -1;
497     }
498
499     if (s->avctx->scenechange_threshold < 1000000000 &&
500         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
501         av_log(avctx, AV_LOG_ERROR,
502                "closed gop with scene change detection are not supported yet, "
503                "set threshold to 1000000000\n");
504         return -1;
505     }
506
507     if (s->flags & CODEC_FLAG_LOW_DELAY) {
508         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
509             av_log(avctx, AV_LOG_ERROR,
510                   "low delay forcing is only available for mpeg2\n");
511             return -1;
512         }
513         if (s->max_b_frames != 0) {
514             av_log(avctx, AV_LOG_ERROR,
515                    "b frames cannot be used with low delay\n");
516             return -1;
517         }
518     }
519
520     if (s->q_scale_type == 1) {
521         if (avctx->qmax > 12) {
522             av_log(avctx, AV_LOG_ERROR,
523                    "non linear quant only supports qmax <= 12 currently\n");
524             return -1;
525         }
526     }
527
528     if (s->avctx->thread_count > 1         &&
529         s->codec_id != CODEC_ID_MPEG4      &&
530         s->codec_id != CODEC_ID_MPEG1VIDEO &&
531         s->codec_id != CODEC_ID_MPEG2VIDEO &&
532         (s->codec_id != CODEC_ID_H263P)) {
533         av_log(avctx, AV_LOG_ERROR,
534                "multi threaded encoding not supported by codec\n");
535         return -1;
536     }
537
538     if (s->avctx->thread_count < 1) {
539         av_log(avctx, AV_LOG_ERROR,
540                "automatic thread number detection not supported by codec,"
541                "patch welcome\n");
542         return -1;
543     }
544
545     if (s->avctx->thread_count > 1)
546         s->rtp_mode = 1;
547
548     if (!avctx->time_base.den || !avctx->time_base.num) {
549         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
550         return -1;
551     }
552
553     i = (INT_MAX / 2 + 128) >> 8;
554     if (avctx->me_threshold >= i) {
555         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
556                i - 1);
557         return -1;
558     }
559     if (avctx->mb_threshold >= i) {
560         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
561                i - 1);
562         return -1;
563     }
564
565     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
566         av_log(avctx, AV_LOG_INFO,
567                "notice: b_frame_strategy only affects the first pass\n");
568         avctx->b_frame_strategy = 0;
569     }
570
571     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
572     if (i > 1) {
573         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
574         avctx->time_base.den /= i;
575         avctx->time_base.num /= i;
576         //return -1;
577     }
578
579     if (s->mpeg_quant || s->codec_id == CODEC_ID_MPEG1VIDEO ||
580         s->codec_id == CODEC_ID_MPEG2VIDEO || s->codec_id == CODEC_ID_MJPEG) {
581         // (a + x * 3 / 8) / x
582         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
583         s->inter_quant_bias = 0;
584     } else {
585         s->intra_quant_bias = 0;
586         // (a - x / 4) / x
587         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
588     }
589
590     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
591         s->intra_quant_bias = avctx->intra_quant_bias;
592     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
593         s->inter_quant_bias = avctx->inter_quant_bias;
594
595     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
596                                   &chroma_v_shift);
597
598     if (avctx->codec_id == CODEC_ID_MPEG4 &&
599         s->avctx->time_base.den > (1 << 16) - 1) {
600         av_log(avctx, AV_LOG_ERROR,
601                "timebase %d/%d not supported by MPEG 4 standard, "
602                "the maximum admitted value for the timebase denominator "
603                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
604                (1 << 16) - 1);
605         return -1;
606     }
607     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
608
609     switch (avctx->codec->id) {
610     case CODEC_ID_MPEG1VIDEO:
611         s->out_format = FMT_MPEG1;
612         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
613         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
614         break;
615     case CODEC_ID_MPEG2VIDEO:
616         s->out_format = FMT_MPEG1;
617         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
618         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
619         s->rtp_mode   = 1;
620         break;
621     case CODEC_ID_LJPEG:
622     case CODEC_ID_MJPEG:
623         s->out_format = FMT_MJPEG;
624         s->intra_only = 1; /* force intra only for jpeg */
625         if (avctx->codec->id == CODEC_ID_LJPEG &&
626             avctx->pix_fmt   == PIX_FMT_BGRA) {
627             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
628             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
629             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
630         } else {
631             s->mjpeg_vsample[0] = 2;
632             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
633             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
634             s->mjpeg_hsample[0] = 2;
635             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
636             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
637         }
638         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
639             ff_mjpeg_encode_init(s) < 0)
640             return -1;
641         avctx->delay = 0;
642         s->low_delay = 1;
643         break;
644     case CODEC_ID_H261:
645         if (!CONFIG_H261_ENCODER)
646             return -1;
647         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
648             av_log(avctx, AV_LOG_ERROR,
649                    "The specified picture size of %dx%d is not valid for the "
650                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
651                     s->width, s->height);
652             return -1;
653         }
654         s->out_format = FMT_H261;
655         avctx->delay  = 0;
656         s->low_delay  = 1;
657         break;
658     case CODEC_ID_H263:
659         if (!CONFIG_H263_ENCODER)
660         return -1;
661         if (ff_match_2uint16(h263_format, FF_ARRAY_ELEMS(h263_format),
662                              s->width, s->height) == 8) {
663             av_log(avctx, AV_LOG_INFO,
664                    "The specified picture size of %dx%d is not valid for "
665                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
666                    "352x288, 704x576, and 1408x1152."
667                    "Try H.263+.\n", s->width, s->height);
668             return -1;
669         }
670         s->out_format = FMT_H263;
671         avctx->delay  = 0;
672         s->low_delay  = 1;
673         break;
674     case CODEC_ID_H263P:
675         s->out_format = FMT_H263;
676         s->h263_plus  = 1;
677         /* Fx */
678         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
679         s->modified_quant  = s->h263_aic;
680         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
681         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
682
683         /* /Fx */
684         /* These are just to be sure */
685         avctx->delay = 0;
686         s->low_delay = 1;
687         break;
688     case CODEC_ID_FLV1:
689         s->out_format      = FMT_H263;
690         s->h263_flv        = 2; /* format = 1; 11-bit codes */
691         s->unrestricted_mv = 1;
692         s->rtp_mode  = 0; /* don't allow GOB */
693         avctx->delay = 0;
694         s->low_delay = 1;
695         break;
696     case CODEC_ID_RV10:
697         s->out_format = FMT_H263;
698         avctx->delay  = 0;
699         s->low_delay  = 1;
700         break;
701     case CODEC_ID_RV20:
702         s->out_format      = FMT_H263;
703         avctx->delay       = 0;
704         s->low_delay       = 1;
705         s->modified_quant  = 1;
706         s->h263_aic        = 1;
707         s->h263_plus       = 1;
708         s->loop_filter     = 1;
709         s->unrestricted_mv = 0;
710         break;
711     case CODEC_ID_MPEG4:
712         s->out_format      = FMT_H263;
713         s->h263_pred       = 1;
714         s->unrestricted_mv = 1;
715         s->low_delay       = s->max_b_frames ? 0 : 1;
716         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
717         break;
718     case CODEC_ID_MSMPEG4V2:
719         s->out_format      = FMT_H263;
720         s->h263_pred       = 1;
721         s->unrestricted_mv = 1;
722         s->msmpeg4_version = 2;
723         avctx->delay       = 0;
724         s->low_delay       = 1;
725         break;
726     case CODEC_ID_MSMPEG4V3:
727         s->out_format        = FMT_H263;
728         s->h263_pred         = 1;
729         s->unrestricted_mv   = 1;
730         s->msmpeg4_version   = 3;
731         s->flipflop_rounding = 1;
732         avctx->delay         = 0;
733         s->low_delay         = 1;
734         break;
735     case CODEC_ID_WMV1:
736         s->out_format        = FMT_H263;
737         s->h263_pred         = 1;
738         s->unrestricted_mv   = 1;
739         s->msmpeg4_version   = 4;
740         s->flipflop_rounding = 1;
741         avctx->delay         = 0;
742         s->low_delay         = 1;
743         break;
744     case CODEC_ID_WMV2:
745         s->out_format        = FMT_H263;
746         s->h263_pred         = 1;
747         s->unrestricted_mv   = 1;
748         s->msmpeg4_version   = 5;
749         s->flipflop_rounding = 1;
750         avctx->delay         = 0;
751         s->low_delay         = 1;
752         break;
753     default:
754         return -1;
755     }
756
757     avctx->has_b_frames = !s->low_delay;
758
759     s->encoding = 1;
760
761     s->progressive_frame    =
762     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
763                                                 CODEC_FLAG_INTERLACED_ME) ||
764                                 s->alternate_scan);
765
766     /* init */
767     if (MPV_common_init(s) < 0)
768         return -1;
769
770     if (!s->dct_quantize)
771         s->dct_quantize = dct_quantize_c;
772     if (!s->denoise_dct)
773         s->denoise_dct  = denoise_dct_c;
774     s->fast_dct_quantize = s->dct_quantize;
775     if (avctx->trellis)
776         s->dct_quantize  = dct_quantize_trellis_c;
777
778     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
779         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
780
781     s->quant_precision = 5;
782
783     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
784     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
785
786     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
787         ff_h261_encode_init(s);
788     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
789         h263_encode_init(s);
790     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
791         ff_msmpeg4_encode_init(s);
792     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
793         && s->out_format == FMT_MPEG1)
794         ff_mpeg1_encode_init(s);
795
796     /* init q matrix */
797     for (i = 0; i < 64; i++) {
798         int j = s->dsp.idct_permutation[i];
799         if (CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4 &&
800             s->mpeg_quant) {
801             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
802             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
803         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
804             s->intra_matrix[j] =
805             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
806         } else {
807             /* mpeg1/2 */
808             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
809             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
810         }
811         if (s->avctx->intra_matrix)
812             s->intra_matrix[j] = s->avctx->intra_matrix[i];
813         if (s->avctx->inter_matrix)
814             s->inter_matrix[j] = s->avctx->inter_matrix[i];
815     }
816
817     /* precompute matrix */
818     /* for mjpeg, we do include qscale in the matrix */
819     if (s->out_format != FMT_MJPEG) {
820         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
821                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
822                           31, 1);
823         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
824                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
825                           31, 0);
826     }
827
828     if (ff_rate_control_init(s) < 0)
829         return -1;
830
831     return 0;
832 }
833
834 av_cold int MPV_encode_end(AVCodecContext *avctx)
835 {
836     MpegEncContext *s = avctx->priv_data;
837
838     ff_rate_control_uninit(s);
839
840     MPV_common_end(s);
841     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
842         s->out_format == FMT_MJPEG)
843         ff_mjpeg_encode_close(s);
844
845     av_freep(&avctx->extradata);
846
847     return 0;
848 }
849
850 static int get_sae(uint8_t *src, int ref, int stride)
851 {
852     int x,y;
853     int acc = 0;
854
855     for (y = 0; y < 16; y++) {
856         for (x = 0; x < 16; x++) {
857             acc += FFABS(src[x + y * stride] - ref);
858         }
859     }
860
861     return acc;
862 }
863
864 static int get_intra_count(MpegEncContext *s, uint8_t *src,
865                            uint8_t *ref, int stride)
866 {
867     int x, y, w, h;
868     int acc = 0;
869
870     w = s->width  & ~15;
871     h = s->height & ~15;
872
873     for (y = 0; y < h; y += 16) {
874         for (x = 0; x < w; x += 16) {
875             int offset = x + y * stride;
876             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
877                                      16);
878             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
879             int sae  = get_sae(src + offset, mean, stride);
880
881             acc += sae + 500 < sad;
882         }
883     }
884     return acc;
885 }
886
887
888 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
889 {
890     AVFrame *pic = NULL;
891     int64_t pts;
892     int i;
893     const int encoding_delay = s->max_b_frames;
894     int direct = 1;
895
896     if (pic_arg) {
897         pts = pic_arg->pts;
898         pic_arg->display_picture_number = s->input_picture_number++;
899
900         if (pts != AV_NOPTS_VALUE) {
901             if (s->user_specified_pts != AV_NOPTS_VALUE) {
902                 int64_t time = pts;
903                 int64_t last = s->user_specified_pts;
904
905                 if (time <= last) {
906                     av_log(s->avctx, AV_LOG_ERROR,
907                            "Error, Invalid timestamp=%"PRId64", "
908                            "last=%"PRId64"\n", pts, s->user_specified_pts);
909                     return -1;
910                 }
911             }
912             s->user_specified_pts = pts;
913         } else {
914             if (s->user_specified_pts != AV_NOPTS_VALUE) {
915                 s->user_specified_pts =
916                 pts = s->user_specified_pts + 1;
917                 av_log(s->avctx, AV_LOG_INFO,
918                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
919                        pts);
920             } else {
921                 pts = pic_arg->display_picture_number;
922             }
923         }
924     }
925
926   if (pic_arg) {
927     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
928         direct = 0;
929     if (pic_arg->linesize[0] != s->linesize)
930         direct = 0;
931     if (pic_arg->linesize[1] != s->uvlinesize)
932         direct = 0;
933     if (pic_arg->linesize[2] != s->uvlinesize)
934         direct = 0;
935
936     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
937     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
938
939     if (direct) {
940         i = ff_find_unused_picture(s, 1);
941         if (i < 0)
942             return i;
943
944         pic = (AVFrame *) &s->picture[i];
945         pic->reference = 3;
946
947         for (i = 0; i < 4; i++) {
948             pic->data[i]     = pic_arg->data[i];
949             pic->linesize[i] = pic_arg->linesize[i];
950         }
951         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
952             return -1;
953         }
954     } else {
955         i = ff_find_unused_picture(s, 0);
956         if (i < 0)
957             return i;
958
959         pic = (AVFrame *) &s->picture[i];
960         pic->reference = 3;
961
962         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
963             return -1;
964         }
965
966         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
967             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
968             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
969             // empty
970         } else {
971             int h_chroma_shift, v_chroma_shift;
972             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
973                                           &v_chroma_shift);
974
975             for (i = 0; i < 3; i++) {
976                 int src_stride = pic_arg->linesize[i];
977                 int dst_stride = i ? s->uvlinesize : s->linesize;
978                 int h_shift = i ? h_chroma_shift : 0;
979                 int v_shift = i ? v_chroma_shift : 0;
980                 int w = s->width  >> h_shift;
981                 int h = s->height >> v_shift;
982                 uint8_t *src = pic_arg->data[i];
983                 uint8_t *dst = pic->data[i];
984
985                 if (!s->avctx->rc_buffer_size)
986                     dst += INPLACE_OFFSET;
987
988                 if (src_stride == dst_stride)
989                     memcpy(dst, src, src_stride * h);
990                 else {
991                     while (h--) {
992                         memcpy(dst, src, w);
993                         dst += dst_stride;
994                         src += src_stride;
995                     }
996                 }
997             }
998         }
999     }
1000     copy_picture_attributes(s, pic, pic_arg);
1001     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1002   }
1003
1004     /* shift buffer entries */
1005     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1006         s->input_picture[i - 1] = s->input_picture[i];
1007
1008     s->input_picture[encoding_delay] = (Picture*) pic;
1009
1010     return 0;
1011 }
1012
1013 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1014 {
1015     int x, y, plane;
1016     int score = 0;
1017     int64_t score64 = 0;
1018
1019     for (plane = 0; plane < 3; plane++) {
1020         const int stride = p->f.linesize[plane];
1021         const int bw = plane ? 1 : 2;
1022         for (y = 0; y < s->mb_height * bw; y++) {
1023             for (x = 0; x < s->mb_width * bw; x++) {
1024                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1025                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1026                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1027                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1028
1029                 switch (s->avctx->frame_skip_exp) {
1030                 case 0: score    =  FFMAX(score, v);          break;
1031                 case 1: score   += FFABS(v);                  break;
1032                 case 2: score   += v * v;                     break;
1033                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1034                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1035                 }
1036             }
1037         }
1038     }
1039
1040     if (score)
1041         score64 = score;
1042
1043     if (score64 < s->avctx->frame_skip_threshold)
1044         return 1;
1045     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1046         return 1;
1047     return 0;
1048 }
1049
1050 static int estimate_best_b_count(MpegEncContext *s)
1051 {
1052     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1053     AVCodecContext *c = avcodec_alloc_context3(NULL);
1054     AVFrame input[FF_MAX_B_FRAMES + 2];
1055     const int scale = s->avctx->brd_scale;
1056     int i, j, out_size, p_lambda, b_lambda, lambda2;
1057     int outbuf_size  = s->width * s->height; // FIXME
1058     uint8_t *outbuf  = av_malloc(outbuf_size);
1059     int64_t best_rd  = INT64_MAX;
1060     int best_b_count = -1;
1061
1062     assert(scale >= 0 && scale <= 3);
1063
1064     //emms_c();
1065     //s->next_picture_ptr->quality;
1066     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1067     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1068     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1069     if (!b_lambda) // FIXME we should do this somewhere else
1070         b_lambda = p_lambda;
1071     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1072                FF_LAMBDA_SHIFT;
1073
1074     c->width        = s->width  >> scale;
1075     c->height       = s->height >> scale;
1076     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1077                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1078     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1079     c->mb_decision  = s->avctx->mb_decision;
1080     c->me_cmp       = s->avctx->me_cmp;
1081     c->mb_cmp       = s->avctx->mb_cmp;
1082     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1083     c->pix_fmt      = PIX_FMT_YUV420P;
1084     c->time_base    = s->avctx->time_base;
1085     c->max_b_frames = s->max_b_frames;
1086
1087     if (avcodec_open2(c, codec, NULL) < 0)
1088         return -1;
1089
1090     for (i = 0; i < s->max_b_frames + 2; i++) {
1091         int ysize = c->width * c->height;
1092         int csize = (c->width / 2) * (c->height / 2);
1093         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1094                                                 s->next_picture_ptr;
1095
1096         avcodec_get_frame_defaults(&input[i]);
1097         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1098         input[i].data[1]     = input[i].data[0] + ysize;
1099         input[i].data[2]     = input[i].data[1] + csize;
1100         input[i].linesize[0] = c->width;
1101         input[i].linesize[1] =
1102         input[i].linesize[2] = c->width / 2;
1103
1104         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1105             pre_input = *pre_input_ptr;
1106
1107             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1108                 pre_input.f.data[0] += INPLACE_OFFSET;
1109                 pre_input.f.data[1] += INPLACE_OFFSET;
1110                 pre_input.f.data[2] += INPLACE_OFFSET;
1111             }
1112
1113             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1114                                  pre_input.f.data[0], pre_input.f.linesize[0],
1115                                  c->width,      c->height);
1116             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1117                                  pre_input.f.data[1], pre_input.f.linesize[1],
1118                                  c->width >> 1, c->height >> 1);
1119             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1120                                  pre_input.f.data[2], pre_input.f.linesize[2],
1121                                  c->width >> 1, c->height >> 1);
1122         }
1123     }
1124
1125     for (j = 0; j < s->max_b_frames + 1; j++) {
1126         int64_t rd = 0;
1127
1128         if (!s->input_picture[j])
1129             break;
1130
1131         c->error[0] = c->error[1] = c->error[2] = 0;
1132
1133         input[0].pict_type = AV_PICTURE_TYPE_I;
1134         input[0].quality   = 1 * FF_QP2LAMBDA;
1135         out_size           = avcodec_encode_video(c, outbuf,
1136                                                   outbuf_size, &input[0]);
1137         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1138
1139         for (i = 0; i < s->max_b_frames + 1; i++) {
1140             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1141
1142             input[i + 1].pict_type = is_p ?
1143                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1144             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1145             out_size = avcodec_encode_video(c, outbuf, outbuf_size,
1146                                             &input[i + 1]);
1147             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1148         }
1149
1150         /* get the delayed frames */
1151         while (out_size) {
1152             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
1153             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1154         }
1155
1156         rd += c->error[0] + c->error[1] + c->error[2];
1157
1158         if (rd < best_rd) {
1159             best_rd = rd;
1160             best_b_count = j;
1161         }
1162     }
1163
1164     av_freep(&outbuf);
1165     avcodec_close(c);
1166     av_freep(&c);
1167
1168     for (i = 0; i < s->max_b_frames + 2; i++) {
1169         av_freep(&input[i].data[0]);
1170     }
1171
1172     return best_b_count;
1173 }
1174
1175 static int select_input_picture(MpegEncContext *s)
1176 {
1177     int i;
1178
1179     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1180         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1181     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1182
1183     /* set next picture type & ordering */
1184     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1185         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1186             s->next_picture_ptr == NULL || s->intra_only) {
1187             s->reordered_input_picture[0] = s->input_picture[0];
1188             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1189             s->reordered_input_picture[0]->f.coded_picture_number =
1190                 s->coded_picture_number++;
1191         } else {
1192             int b_frames;
1193
1194             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1195                 if (s->picture_in_gop_number < s->gop_size &&
1196                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1197                     // FIXME check that te gop check above is +-1 correct
1198                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1199                     //       s->input_picture[0]->f.data[0],
1200                     //       s->input_picture[0]->pts);
1201
1202                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1203                         for (i = 0; i < 4; i++)
1204                             s->input_picture[0]->f.data[i] = NULL;
1205                         s->input_picture[0]->f.type = 0;
1206                     } else {
1207                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1208                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1209
1210                         s->avctx->release_buffer(s->avctx,
1211                                                  (AVFrame *) s->input_picture[0]);
1212                     }
1213
1214                     emms_c();
1215                     ff_vbv_update(s, 0);
1216
1217                     goto no_output_pic;
1218                 }
1219             }
1220
1221             if (s->flags & CODEC_FLAG_PASS2) {
1222                 for (i = 0; i < s->max_b_frames + 1; i++) {
1223                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1224
1225                     if (pict_num >= s->rc_context.num_entries)
1226                         break;
1227                     if (!s->input_picture[i]) {
1228                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1229                         break;
1230                     }
1231
1232                     s->input_picture[i]->f.pict_type =
1233                         s->rc_context.entry[pict_num].new_pict_type;
1234                 }
1235             }
1236
1237             if (s->avctx->b_frame_strategy == 0) {
1238                 b_frames = s->max_b_frames;
1239                 while (b_frames && !s->input_picture[b_frames])
1240                     b_frames--;
1241             } else if (s->avctx->b_frame_strategy == 1) {
1242                 for (i = 1; i < s->max_b_frames + 1; i++) {
1243                     if (s->input_picture[i] &&
1244                         s->input_picture[i]->b_frame_score == 0) {
1245                         s->input_picture[i]->b_frame_score =
1246                             get_intra_count(s,
1247                                             s->input_picture[i    ]->f.data[0],
1248                                             s->input_picture[i - 1]->f.data[0],
1249                                             s->linesize) + 1;
1250                     }
1251                 }
1252                 for (i = 0; i < s->max_b_frames + 1; i++) {
1253                     if (s->input_picture[i] == NULL ||
1254                         s->input_picture[i]->b_frame_score - 1 >
1255                             s->mb_num / s->avctx->b_sensitivity)
1256                         break;
1257                 }
1258
1259                 b_frames = FFMAX(0, i - 1);
1260
1261                 /* reset scores */
1262                 for (i = 0; i < b_frames + 1; i++) {
1263                     s->input_picture[i]->b_frame_score = 0;
1264                 }
1265             } else if (s->avctx->b_frame_strategy == 2) {
1266                 b_frames = estimate_best_b_count(s);
1267             } else {
1268                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1269                 b_frames = 0;
1270             }
1271
1272             emms_c();
1273             //static int b_count = 0;
1274             //b_count += b_frames;
1275             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1276
1277             for (i = b_frames - 1; i >= 0; i--) {
1278                 int type = s->input_picture[i]->f.pict_type;
1279                 if (type && type != AV_PICTURE_TYPE_B)
1280                     b_frames = i;
1281             }
1282             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1283                 b_frames == s->max_b_frames) {
1284                 av_log(s->avctx, AV_LOG_ERROR,
1285                        "warning, too many b frames in a row\n");
1286             }
1287
1288             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1289                 if ((s->flags2 & CODEC_FLAG2_STRICT_GOP) &&
1290                     s->gop_size > s->picture_in_gop_number) {
1291                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1292                 } else {
1293                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1294                         b_frames = 0;
1295                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1296                 }
1297             }
1298
1299             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1300                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1301                 b_frames--;
1302
1303             s->reordered_input_picture[0] = s->input_picture[b_frames];
1304             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1305                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1306             s->reordered_input_picture[0]->f.coded_picture_number =
1307                 s->coded_picture_number++;
1308             for (i = 0; i < b_frames; i++) {
1309                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1310                 s->reordered_input_picture[i + 1]->f.pict_type =
1311                     AV_PICTURE_TYPE_B;
1312                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1313                     s->coded_picture_number++;
1314             }
1315         }
1316     }
1317 no_output_pic:
1318     if (s->reordered_input_picture[0]) {
1319         s->reordered_input_picture[0]->f.reference =
1320            s->reordered_input_picture[0]->f.pict_type !=
1321                AV_PICTURE_TYPE_B ? 3 : 0;
1322
1323         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1324
1325         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1326             s->avctx->rc_buffer_size) {
1327             // input is a shared pix, so we can't modifiy it -> alloc a new
1328             // one & ensure that the shared one is reuseable
1329
1330             Picture *pic;
1331             int i = ff_find_unused_picture(s, 0);
1332             if (i < 0)
1333                 return i;
1334             pic = &s->picture[i];
1335
1336             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1337             if (ff_alloc_picture(s, pic, 0) < 0) {
1338                 return -1;
1339             }
1340
1341             /* mark us unused / free shared pic */
1342             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1343                 s->avctx->release_buffer(s->avctx,
1344                                          (AVFrame *) s->reordered_input_picture[0]);
1345             for (i = 0; i < 4; i++)
1346                 s->reordered_input_picture[0]->f.data[i] = NULL;
1347             s->reordered_input_picture[0]->f.type = 0;
1348
1349             copy_picture_attributes(s, (AVFrame *) pic,
1350                                     (AVFrame *) s->reordered_input_picture[0]);
1351
1352             s->current_picture_ptr = pic;
1353         } else {
1354             // input is not a shared pix -> reuse buffer for current_pix
1355
1356             assert(s->reordered_input_picture[0]->f.type ==
1357                        FF_BUFFER_TYPE_USER ||
1358                    s->reordered_input_picture[0]->f.type ==
1359                        FF_BUFFER_TYPE_INTERNAL);
1360
1361             s->current_picture_ptr = s->reordered_input_picture[0];
1362             for (i = 0; i < 4; i++) {
1363                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1364             }
1365         }
1366         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1367
1368         s->picture_number = s->new_picture.f.display_picture_number;
1369         //printf("dpn:%d\n", s->picture_number);
1370     } else {
1371         memset(&s->new_picture, 0, sizeof(Picture));
1372     }
1373     return 0;
1374 }
1375
1376 int MPV_encode_picture(AVCodecContext *avctx,
1377                        unsigned char *buf, int buf_size, void *data)
1378 {
1379     MpegEncContext *s = avctx->priv_data;
1380     AVFrame *pic_arg  = data;
1381     int i, stuffing_count;
1382     int context_count = s->slice_context_count;
1383
1384     for (i = 0; i < context_count; i++) {
1385         int start_y = s->thread_context[i]->start_mb_y;
1386         int   end_y = s->thread_context[i]->  end_mb_y;
1387         int h       = s->mb_height;
1388         uint8_t *start = buf + (size_t)(((int64_t) buf_size) * start_y / h);
1389         uint8_t *end   = buf + (size_t)(((int64_t) buf_size) *   end_y / h);
1390
1391         init_put_bits(&s->thread_context[i]->pb, start, end - start);
1392     }
1393
1394     s->picture_in_gop_number++;
1395
1396     if (load_input_picture(s, pic_arg) < 0)
1397         return -1;
1398
1399     if (select_input_picture(s) < 0) {
1400         return -1;
1401     }
1402
1403     /* output? */
1404     if (s->new_picture.f.data[0]) {
1405         s->pict_type = s->new_picture.f.pict_type;
1406         //emms_c();
1407         //printf("qs:%f %f %d\n", s->new_picture.quality,
1408         //       s->current_picture.quality, s->qscale);
1409         MPV_frame_start(s, avctx);
1410 vbv_retry:
1411         if (encode_picture(s, s->picture_number) < 0)
1412             return -1;
1413
1414         avctx->header_bits = s->header_bits;
1415         avctx->mv_bits     = s->mv_bits;
1416         avctx->misc_bits   = s->misc_bits;
1417         avctx->i_tex_bits  = s->i_tex_bits;
1418         avctx->p_tex_bits  = s->p_tex_bits;
1419         avctx->i_count     = s->i_count;
1420         // FIXME f/b_count in avctx
1421         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1422         avctx->skip_count  = s->skip_count;
1423
1424         MPV_frame_end(s);
1425
1426         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1427             ff_mjpeg_encode_picture_trailer(s);
1428
1429         if (avctx->rc_buffer_size) {
1430             RateControlContext *rcc = &s->rc_context;
1431             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1432
1433             if (put_bits_count(&s->pb) > max_size &&
1434                 s->lambda < s->avctx->lmax) {
1435                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1436                                        (s->qscale + 1) / s->qscale);
1437                 if (s->adaptive_quant) {
1438                     int i;
1439                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1440                         s->lambda_table[i] =
1441                             FFMAX(s->lambda_table[i] + 1,
1442                                   s->lambda_table[i] * (s->qscale + 1) /
1443                                   s->qscale);
1444                 }
1445                 s->mb_skipped = 0;        // done in MPV_frame_start()
1446                 // done in encode_picture() so we must undo it
1447                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1448                     if (s->flipflop_rounding          ||
1449                         s->codec_id == CODEC_ID_H263P ||
1450                         s->codec_id == CODEC_ID_MPEG4)
1451                         s->no_rounding ^= 1;
1452                 }
1453                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1454                     s->time_base       = s->last_time_base;
1455                     s->last_non_b_time = s->time - s->pp_time;
1456                 }
1457                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1458                 for (i = 0; i < context_count; i++) {
1459                     PutBitContext *pb = &s->thread_context[i]->pb;
1460                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1461                 }
1462                 goto vbv_retry;
1463             }
1464
1465             assert(s->avctx->rc_max_rate);
1466         }
1467
1468         if (s->flags & CODEC_FLAG_PASS1)
1469             ff_write_pass1_stats(s);
1470
1471         for (i = 0; i < 4; i++) {
1472             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1473             avctx->error[i] += s->current_picture_ptr->f.error[i];
1474         }
1475
1476         if (s->flags & CODEC_FLAG_PASS1)
1477             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1478                    avctx->i_tex_bits + avctx->p_tex_bits ==
1479                        put_bits_count(&s->pb));
1480         flush_put_bits(&s->pb);
1481         s->frame_bits  = put_bits_count(&s->pb);
1482
1483         stuffing_count = ff_vbv_update(s, s->frame_bits);
1484         if (stuffing_count) {
1485             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1486                     stuffing_count + 50) {
1487                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1488                 return -1;
1489             }
1490
1491             switch (s->codec_id) {
1492             case CODEC_ID_MPEG1VIDEO:
1493             case CODEC_ID_MPEG2VIDEO:
1494                 while (stuffing_count--) {
1495                     put_bits(&s->pb, 8, 0);
1496                 }
1497             break;
1498             case CODEC_ID_MPEG4:
1499                 put_bits(&s->pb, 16, 0);
1500                 put_bits(&s->pb, 16, 0x1C3);
1501                 stuffing_count -= 4;
1502                 while (stuffing_count--) {
1503                     put_bits(&s->pb, 8, 0xFF);
1504                 }
1505             break;
1506             default:
1507                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1508             }
1509             flush_put_bits(&s->pb);
1510             s->frame_bits  = put_bits_count(&s->pb);
1511         }
1512
1513         /* update mpeg1/2 vbv_delay for CBR */
1514         if (s->avctx->rc_max_rate                          &&
1515             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1516             s->out_format == FMT_MPEG1                     &&
1517             90000LL * (avctx->rc_buffer_size - 1) <=
1518                 s->avctx->rc_max_rate * 0xFFFFLL) {
1519             int vbv_delay, min_delay;
1520             double inbits  = s->avctx->rc_max_rate *
1521                              av_q2d(s->avctx->time_base);
1522             int    minbits = s->frame_bits - 8 *
1523                              (s->vbv_delay_ptr - s->pb.buf - 1);
1524             double bits    = s->rc_context.buffer_index + minbits - inbits;
1525
1526             if (bits < 0)
1527                 av_log(s->avctx, AV_LOG_ERROR,
1528                        "Internal error, negative bits\n");
1529
1530             assert(s->repeat_first_field == 0);
1531
1532             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1533             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1534                         s->avctx->rc_max_rate;
1535
1536             vbv_delay = FFMAX(vbv_delay, min_delay);
1537
1538             assert(vbv_delay < 0xFFFF);
1539
1540             s->vbv_delay_ptr[0] &= 0xF8;
1541             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1542             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1543             s->vbv_delay_ptr[2] &= 0x07;
1544             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1545             avctx->vbv_delay     = vbv_delay * 300;
1546         }
1547         s->total_bits     += s->frame_bits;
1548         avctx->frame_bits  = s->frame_bits;
1549     } else {
1550         assert((put_bits_ptr(&s->pb) == s->pb.buf));
1551         s->frame_bits = 0;
1552     }
1553     assert((s->frame_bits & 7) == 0);
1554
1555     return s->frame_bits / 8;
1556 }
1557
1558 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1559                                                 int n, int threshold)
1560 {
1561     static const char tab[64] = {
1562         3, 2, 2, 1, 1, 1, 1, 1,
1563         1, 1, 1, 1, 1, 1, 1, 1,
1564         1, 1, 1, 1, 1, 1, 1, 1,
1565         0, 0, 0, 0, 0, 0, 0, 0,
1566         0, 0, 0, 0, 0, 0, 0, 0,
1567         0, 0, 0, 0, 0, 0, 0, 0,
1568         0, 0, 0, 0, 0, 0, 0, 0,
1569         0, 0, 0, 0, 0, 0, 0, 0
1570     };
1571     int score = 0;
1572     int run = 0;
1573     int i;
1574     DCTELEM *block = s->block[n];
1575     const int last_index = s->block_last_index[n];
1576     int skip_dc;
1577
1578     if (threshold < 0) {
1579         skip_dc = 0;
1580         threshold = -threshold;
1581     } else
1582         skip_dc = 1;
1583
1584     /* Are all we could set to zero already zero? */
1585     if (last_index <= skip_dc - 1)
1586         return;
1587
1588     for (i = 0; i <= last_index; i++) {
1589         const int j = s->intra_scantable.permutated[i];
1590         const int level = FFABS(block[j]);
1591         if (level == 1) {
1592             if (skip_dc && i == 0)
1593                 continue;
1594             score += tab[run];
1595             run = 0;
1596         } else if (level > 1) {
1597             return;
1598         } else {
1599             run++;
1600         }
1601     }
1602     if (score >= threshold)
1603         return;
1604     for (i = skip_dc; i <= last_index; i++) {
1605         const int j = s->intra_scantable.permutated[i];
1606         block[j] = 0;
1607     }
1608     if (block[0])
1609         s->block_last_index[n] = 0;
1610     else
1611         s->block_last_index[n] = -1;
1612 }
1613
1614 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1615                                int last_index)
1616 {
1617     int i;
1618     const int maxlevel = s->max_qcoeff;
1619     const int minlevel = s->min_qcoeff;
1620     int overflow = 0;
1621
1622     if (s->mb_intra) {
1623         i = 1; // skip clipping of intra dc
1624     } else
1625         i = 0;
1626
1627     for (; i <= last_index; i++) {
1628         const int j = s->intra_scantable.permutated[i];
1629         int level = block[j];
1630
1631         if (level > maxlevel) {
1632             level = maxlevel;
1633             overflow++;
1634         } else if (level < minlevel) {
1635             level = minlevel;
1636             overflow++;
1637         }
1638
1639         block[j] = level;
1640     }
1641
1642     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1643         av_log(s->avctx, AV_LOG_INFO,
1644                "warning, clipping %d dct coefficients to %d..%d\n",
1645                overflow, minlevel, maxlevel);
1646 }
1647
1648 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1649 {
1650     int x, y;
1651     // FIXME optimize
1652     for (y = 0; y < 8; y++) {
1653         for (x = 0; x < 8; x++) {
1654             int x2, y2;
1655             int sum = 0;
1656             int sqr = 0;
1657             int count = 0;
1658
1659             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1660                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1661                     int v = ptr[x2 + y2 * stride];
1662                     sum += v;
1663                     sqr += v * v;
1664                     count++;
1665                 }
1666             }
1667             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1668         }
1669     }
1670 }
1671
1672 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1673                                                 int motion_x, int motion_y,
1674                                                 int mb_block_height,
1675                                                 int mb_block_count)
1676 {
1677     int16_t weight[8][64];
1678     DCTELEM orig[8][64];
1679     const int mb_x = s->mb_x;
1680     const int mb_y = s->mb_y;
1681     int i;
1682     int skip_dct[8];
1683     int dct_offset = s->linesize * 8; // default for progressive frames
1684     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1685     int wrap_y, wrap_c;
1686
1687     for (i = 0; i < mb_block_count; i++)
1688         skip_dct[i] = s->skipdct;
1689
1690     if (s->adaptive_quant) {
1691         const int last_qp = s->qscale;
1692         const int mb_xy = mb_x + mb_y * s->mb_stride;
1693
1694         s->lambda = s->lambda_table[mb_xy];
1695         update_qscale(s);
1696
1697         if (!(s->flags & CODEC_FLAG_QP_RD)) {
1698             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1699             s->dquant = s->qscale - last_qp;
1700
1701             if (s->out_format == FMT_H263) {
1702                 s->dquant = av_clip(s->dquant, -2, 2);
1703
1704                 if (s->codec_id == CODEC_ID_MPEG4) {
1705                     if (!s->mb_intra) {
1706                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1707                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1708                                 s->dquant = 0;
1709                         }
1710                         if (s->mv_type == MV_TYPE_8X8)
1711                             s->dquant = 0;
1712                     }
1713                 }
1714             }
1715         }
1716         ff_set_qscale(s, last_qp + s->dquant);
1717     } else if (s->flags & CODEC_FLAG_QP_RD)
1718         ff_set_qscale(s, s->qscale + s->dquant);
1719
1720     wrap_y = s->linesize;
1721     wrap_c = s->uvlinesize;
1722     ptr_y  = s->new_picture.f.data[0] +
1723              (mb_y * 16 * wrap_y)              + mb_x * 16;
1724     ptr_cb = s->new_picture.f.data[1] +
1725              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1726     ptr_cr = s->new_picture.f.data[2] +
1727              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1728
1729     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1730         uint8_t *ebuf = s->edge_emu_buffer + 32;
1731         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1732                                 mb_y * 16, s->width, s->height);
1733         ptr_y = ebuf;
1734         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1735                                 mb_block_height, mb_x * 8, mb_y * 8,
1736                                 s->width >> 1, s->height >> 1);
1737         ptr_cb = ebuf + 18 * wrap_y;
1738         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1739                                 mb_block_height, mb_x * 8, mb_y * 8,
1740                                 s->width >> 1, s->height >> 1);
1741         ptr_cr = ebuf + 18 * wrap_y + 8;
1742     }
1743
1744     if (s->mb_intra) {
1745         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1746             int progressive_score, interlaced_score;
1747
1748             s->interlaced_dct = 0;
1749             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1750                                                     NULL, wrap_y, 8) +
1751                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1752                                                     NULL, wrap_y, 8) - 400;
1753
1754             if (progressive_score > 0) {
1755                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1756                                                        NULL, wrap_y * 2, 8) +
1757                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1758                                                        NULL, wrap_y * 2, 8);
1759                 if (progressive_score > interlaced_score) {
1760                     s->interlaced_dct = 1;
1761
1762                     dct_offset = wrap_y;
1763                     wrap_y <<= 1;
1764                     if (s->chroma_format == CHROMA_422)
1765                         wrap_c <<= 1;
1766                 }
1767             }
1768         }
1769
1770         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1771         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1772         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1773         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1774
1775         if (s->flags & CODEC_FLAG_GRAY) {
1776             skip_dct[4] = 1;
1777             skip_dct[5] = 1;
1778         } else {
1779             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1780             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1781             if (!s->chroma_y_shift) { /* 422 */
1782                 s->dsp.get_pixels(s->block[6],
1783                                   ptr_cb + (dct_offset >> 1), wrap_c);
1784                 s->dsp.get_pixels(s->block[7],
1785                                   ptr_cr + (dct_offset >> 1), wrap_c);
1786             }
1787         }
1788     } else {
1789         op_pixels_func (*op_pix)[4];
1790         qpel_mc_func (*op_qpix)[16];
1791         uint8_t *dest_y, *dest_cb, *dest_cr;
1792
1793         dest_y  = s->dest[0];
1794         dest_cb = s->dest[1];
1795         dest_cr = s->dest[2];
1796
1797         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1798             op_pix  = s->dsp.put_pixels_tab;
1799             op_qpix = s->dsp.put_qpel_pixels_tab;
1800         } else {
1801             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1802             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1803         }
1804
1805         if (s->mv_dir & MV_DIR_FORWARD) {
1806             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data,
1807                        op_pix, op_qpix);
1808             op_pix  = s->dsp.avg_pixels_tab;
1809             op_qpix = s->dsp.avg_qpel_pixels_tab;
1810         }
1811         if (s->mv_dir & MV_DIR_BACKWARD) {
1812             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data,
1813                        op_pix, op_qpix);
1814         }
1815
1816         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1817             int progressive_score, interlaced_score;
1818
1819             s->interlaced_dct = 0;
1820             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1821                                                     ptr_y,              wrap_y,
1822                                                     8) +
1823                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1824                                                     ptr_y + wrap_y * 8, wrap_y,
1825                                                     8) - 400;
1826
1827             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1828                 progressive_score -= 400;
1829
1830             if (progressive_score > 0) {
1831                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1832                                                        ptr_y,
1833                                                        wrap_y * 2, 8) +
1834                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1835                                                        ptr_y + wrap_y,
1836                                                        wrap_y * 2, 8);
1837
1838                 if (progressive_score > interlaced_score) {
1839                     s->interlaced_dct = 1;
1840
1841                     dct_offset = wrap_y;
1842                     wrap_y <<= 1;
1843                     if (s->chroma_format == CHROMA_422)
1844                         wrap_c <<= 1;
1845                 }
1846             }
1847         }
1848
1849         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1850         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1851         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1852                            dest_y + dct_offset, wrap_y);
1853         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1854                            dest_y + dct_offset + 8, wrap_y);
1855
1856         if (s->flags & CODEC_FLAG_GRAY) {
1857             skip_dct[4] = 1;
1858             skip_dct[5] = 1;
1859         } else {
1860             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1861             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1862             if (!s->chroma_y_shift) { /* 422 */
1863                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1864                                    dest_cb + (dct_offset >> 1), wrap_c);
1865                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1866                                    dest_cr + (dct_offset >> 1), wrap_c);
1867             }
1868         }
1869         /* pre quantization */
1870         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1871                 2 * s->qscale * s->qscale) {
1872             // FIXME optimize
1873             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1874                               wrap_y, 8) < 20 * s->qscale)
1875                 skip_dct[0] = 1;
1876             if (s->dsp.sad[1](NULL, ptr_y + 8,
1877                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1878                 skip_dct[1] = 1;
1879             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1880                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1881                 skip_dct[2] = 1;
1882             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1883                               dest_y + dct_offset + 8,
1884                               wrap_y, 8) < 20 * s->qscale)
1885                 skip_dct[3] = 1;
1886             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1887                               wrap_c, 8) < 20 * s->qscale)
1888                 skip_dct[4] = 1;
1889             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1890                               wrap_c, 8) < 20 * s->qscale)
1891                 skip_dct[5] = 1;
1892             if (!s->chroma_y_shift) { /* 422 */
1893                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1894                                   dest_cb + (dct_offset >> 1),
1895                                   wrap_c, 8) < 20 * s->qscale)
1896                     skip_dct[6] = 1;
1897                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1898                                   dest_cr + (dct_offset >> 1),
1899                                   wrap_c, 8) < 20 * s->qscale)
1900                     skip_dct[7] = 1;
1901             }
1902         }
1903     }
1904
1905     if (s->avctx->quantizer_noise_shaping) {
1906         if (!skip_dct[0])
1907             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1908         if (!skip_dct[1])
1909             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1910         if (!skip_dct[2])
1911             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1912         if (!skip_dct[3])
1913             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1914         if (!skip_dct[4])
1915             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1916         if (!skip_dct[5])
1917             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1918         if (!s->chroma_y_shift) { /* 422 */
1919             if (!skip_dct[6])
1920                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1921                                   wrap_c);
1922             if (!skip_dct[7])
1923                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1924                                   wrap_c);
1925         }
1926         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
1927     }
1928
1929     /* DCT & quantize */
1930     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1931     {
1932         for (i = 0; i < mb_block_count; i++) {
1933             if (!skip_dct[i]) {
1934                 int overflow;
1935                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1936                 // FIXME we could decide to change to quantizer instead of
1937                 // clipping
1938                 // JS: I don't think that would be a good idea it could lower
1939                 //     quality instead of improve it. Just INTRADC clipping
1940                 //     deserves changes in quantizer
1941                 if (overflow)
1942                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
1943             } else
1944                 s->block_last_index[i] = -1;
1945         }
1946         if (s->avctx->quantizer_noise_shaping) {
1947             for (i = 0; i < mb_block_count; i++) {
1948                 if (!skip_dct[i]) {
1949                     s->block_last_index[i] =
1950                         dct_quantize_refine(s, s->block[i], weight[i],
1951                                             orig[i], i, s->qscale);
1952                 }
1953             }
1954         }
1955
1956         if (s->luma_elim_threshold && !s->mb_intra)
1957             for (i = 0; i < 4; i++)
1958                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
1959         if (s->chroma_elim_threshold && !s->mb_intra)
1960             for (i = 4; i < mb_block_count; i++)
1961                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
1962
1963         if (s->flags & CODEC_FLAG_CBP_RD) {
1964             for (i = 0; i < mb_block_count; i++) {
1965                 if (s->block_last_index[i] == -1)
1966                     s->coded_score[i] = INT_MAX / 256;
1967             }
1968         }
1969     }
1970
1971     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
1972         s->block_last_index[4] =
1973         s->block_last_index[5] = 0;
1974         s->block[4][0] =
1975         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
1976     }
1977
1978     // non c quantize code returns incorrect block_last_index FIXME
1979     if (s->alternate_scan && s->dct_quantize != dct_quantize_c) {
1980         for (i = 0; i < mb_block_count; i++) {
1981             int j;
1982             if (s->block_last_index[i] > 0) {
1983                 for (j = 63; j > 0; j--) {
1984                     if (s->block[i][s->intra_scantable.permutated[j]])
1985                         break;
1986                 }
1987                 s->block_last_index[i] = j;
1988             }
1989         }
1990     }
1991
1992     /* huffman encode */
1993     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
1994     case CODEC_ID_MPEG1VIDEO:
1995     case CODEC_ID_MPEG2VIDEO:
1996         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
1997             mpeg1_encode_mb(s, s->block, motion_x, motion_y);
1998         break;
1999     case CODEC_ID_MPEG4:
2000         if (CONFIG_MPEG4_ENCODER)
2001             mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2002         break;
2003     case CODEC_ID_MSMPEG4V2:
2004     case CODEC_ID_MSMPEG4V3:
2005     case CODEC_ID_WMV1:
2006         if (CONFIG_MSMPEG4_ENCODER)
2007             msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2008         break;
2009     case CODEC_ID_WMV2:
2010         if (CONFIG_WMV2_ENCODER)
2011             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2012         break;
2013     case CODEC_ID_H261:
2014         if (CONFIG_H261_ENCODER)
2015             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2016         break;
2017     case CODEC_ID_H263:
2018     case CODEC_ID_H263P:
2019     case CODEC_ID_FLV1:
2020     case CODEC_ID_RV10:
2021     case CODEC_ID_RV20:
2022         if (CONFIG_H263_ENCODER)
2023             h263_encode_mb(s, s->block, motion_x, motion_y);
2024         break;
2025     case CODEC_ID_MJPEG:
2026         if (CONFIG_MJPEG_ENCODER)
2027             ff_mjpeg_encode_mb(s, s->block);
2028         break;
2029     default:
2030         assert(0);
2031     }
2032 }
2033
2034 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2035 {
2036     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2037     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2038 }
2039
2040 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2041     int i;
2042
2043     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2044
2045     /* mpeg1 */
2046     d->mb_skip_run= s->mb_skip_run;
2047     for(i=0; i<3; i++)
2048         d->last_dc[i] = s->last_dc[i];
2049
2050     /* statistics */
2051     d->mv_bits= s->mv_bits;
2052     d->i_tex_bits= s->i_tex_bits;
2053     d->p_tex_bits= s->p_tex_bits;
2054     d->i_count= s->i_count;
2055     d->f_count= s->f_count;
2056     d->b_count= s->b_count;
2057     d->skip_count= s->skip_count;
2058     d->misc_bits= s->misc_bits;
2059     d->last_bits= 0;
2060
2061     d->mb_skipped= 0;
2062     d->qscale= s->qscale;
2063     d->dquant= s->dquant;
2064
2065     d->esc3_level_length= s->esc3_level_length;
2066 }
2067
2068 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2069     int i;
2070
2071     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2072     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2073
2074     /* mpeg1 */
2075     d->mb_skip_run= s->mb_skip_run;
2076     for(i=0; i<3; i++)
2077         d->last_dc[i] = s->last_dc[i];
2078
2079     /* statistics */
2080     d->mv_bits= s->mv_bits;
2081     d->i_tex_bits= s->i_tex_bits;
2082     d->p_tex_bits= s->p_tex_bits;
2083     d->i_count= s->i_count;
2084     d->f_count= s->f_count;
2085     d->b_count= s->b_count;
2086     d->skip_count= s->skip_count;
2087     d->misc_bits= s->misc_bits;
2088
2089     d->mb_intra= s->mb_intra;
2090     d->mb_skipped= s->mb_skipped;
2091     d->mv_type= s->mv_type;
2092     d->mv_dir= s->mv_dir;
2093     d->pb= s->pb;
2094     if(s->data_partitioning){
2095         d->pb2= s->pb2;
2096         d->tex_pb= s->tex_pb;
2097     }
2098     d->block= s->block;
2099     for(i=0; i<8; i++)
2100         d->block_last_index[i]= s->block_last_index[i];
2101     d->interlaced_dct= s->interlaced_dct;
2102     d->qscale= s->qscale;
2103
2104     d->esc3_level_length= s->esc3_level_length;
2105 }
2106
2107 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2108                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2109                            int *dmin, int *next_block, int motion_x, int motion_y)
2110 {
2111     int score;
2112     uint8_t *dest_backup[3];
2113
2114     copy_context_before_encode(s, backup, type);
2115
2116     s->block= s->blocks[*next_block];
2117     s->pb= pb[*next_block];
2118     if(s->data_partitioning){
2119         s->pb2   = pb2   [*next_block];
2120         s->tex_pb= tex_pb[*next_block];
2121     }
2122
2123     if(*next_block){
2124         memcpy(dest_backup, s->dest, sizeof(s->dest));
2125         s->dest[0] = s->rd_scratchpad;
2126         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2127         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2128         assert(s->linesize >= 32); //FIXME
2129     }
2130
2131     encode_mb(s, motion_x, motion_y);
2132
2133     score= put_bits_count(&s->pb);
2134     if(s->data_partitioning){
2135         score+= put_bits_count(&s->pb2);
2136         score+= put_bits_count(&s->tex_pb);
2137     }
2138
2139     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2140         MPV_decode_mb(s, s->block);
2141
2142         score *= s->lambda2;
2143         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2144     }
2145
2146     if(*next_block){
2147         memcpy(s->dest, dest_backup, sizeof(s->dest));
2148     }
2149
2150     if(score<*dmin){
2151         *dmin= score;
2152         *next_block^=1;
2153
2154         copy_context_after_encode(best, s, type);
2155     }
2156 }
2157
2158 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2159     uint32_t *sq = ff_squareTbl + 256;
2160     int acc=0;
2161     int x,y;
2162
2163     if(w==16 && h==16)
2164         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2165     else if(w==8 && h==8)
2166         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2167
2168     for(y=0; y<h; y++){
2169         for(x=0; x<w; x++){
2170             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2171         }
2172     }
2173
2174     assert(acc>=0);
2175
2176     return acc;
2177 }
2178
2179 static int sse_mb(MpegEncContext *s){
2180     int w= 16;
2181     int h= 16;
2182
2183     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2184     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2185
2186     if(w==16 && h==16)
2187       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2188         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2189                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2190                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2191       }else{
2192         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2193                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2194                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2195       }
2196     else
2197         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2198                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2199                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2200 }
2201
2202 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2203     MpegEncContext *s= *(void**)arg;
2204
2205
2206     s->me.pre_pass=1;
2207     s->me.dia_size= s->avctx->pre_dia_size;
2208     s->first_slice_line=1;
2209     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2210         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2211             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2212         }
2213         s->first_slice_line=0;
2214     }
2215
2216     s->me.pre_pass=0;
2217
2218     return 0;
2219 }
2220
2221 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2222     MpegEncContext *s= *(void**)arg;
2223
2224     ff_check_alignment();
2225
2226     s->me.dia_size= s->avctx->dia_size;
2227     s->first_slice_line=1;
2228     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2229         s->mb_x=0; //for block init below
2230         ff_init_block_index(s);
2231         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2232             s->block_index[0]+=2;
2233             s->block_index[1]+=2;
2234             s->block_index[2]+=2;
2235             s->block_index[3]+=2;
2236
2237             /* compute motion vector & mb_type and store in context */
2238             if(s->pict_type==AV_PICTURE_TYPE_B)
2239                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2240             else
2241                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2242         }
2243         s->first_slice_line=0;
2244     }
2245     return 0;
2246 }
2247
2248 static int mb_var_thread(AVCodecContext *c, void *arg){
2249     MpegEncContext *s= *(void**)arg;
2250     int mb_x, mb_y;
2251
2252     ff_check_alignment();
2253
2254     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2255         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2256             int xx = mb_x * 16;
2257             int yy = mb_y * 16;
2258             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2259             int varc;
2260             int sum = s->dsp.pix_sum(pix, s->linesize);
2261
2262             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2263
2264             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2265             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2266             s->me.mb_var_sum_temp    += varc;
2267         }
2268     }
2269     return 0;
2270 }
2271
2272 static void write_slice_end(MpegEncContext *s){
2273     if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4){
2274         if(s->partitioned_frame){
2275             ff_mpeg4_merge_partitions(s);
2276         }
2277
2278         ff_mpeg4_stuffing(&s->pb);
2279     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2280         ff_mjpeg_encode_stuffing(&s->pb);
2281     }
2282
2283     avpriv_align_put_bits(&s->pb);
2284     flush_put_bits(&s->pb);
2285
2286     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2287         s->misc_bits+= get_bits_diff(s);
2288 }
2289
2290 static int encode_thread(AVCodecContext *c, void *arg){
2291     MpegEncContext *s= *(void**)arg;
2292     int mb_x, mb_y, pdif = 0;
2293     int chr_h= 16>>s->chroma_y_shift;
2294     int i, j;
2295     MpegEncContext best_s, backup_s;
2296     uint8_t bit_buf[2][MAX_MB_BYTES];
2297     uint8_t bit_buf2[2][MAX_MB_BYTES];
2298     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2299     PutBitContext pb[2], pb2[2], tex_pb[2];
2300 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2301
2302     ff_check_alignment();
2303
2304     for(i=0; i<2; i++){
2305         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2306         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2307         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2308     }
2309
2310     s->last_bits= put_bits_count(&s->pb);
2311     s->mv_bits=0;
2312     s->misc_bits=0;
2313     s->i_tex_bits=0;
2314     s->p_tex_bits=0;
2315     s->i_count=0;
2316     s->f_count=0;
2317     s->b_count=0;
2318     s->skip_count=0;
2319
2320     for(i=0; i<3; i++){
2321         /* init last dc values */
2322         /* note: quant matrix value (8) is implied here */
2323         s->last_dc[i] = 128 << s->intra_dc_precision;
2324
2325         s->current_picture.f.error[i] = 0;
2326     }
2327     s->mb_skip_run = 0;
2328     memset(s->last_mv, 0, sizeof(s->last_mv));
2329
2330     s->last_mv_dir = 0;
2331
2332     switch(s->codec_id){
2333     case CODEC_ID_H263:
2334     case CODEC_ID_H263P:
2335     case CODEC_ID_FLV1:
2336         if (CONFIG_H263_ENCODER)
2337             s->gob_index = ff_h263_get_gob_height(s);
2338         break;
2339     case CODEC_ID_MPEG4:
2340         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2341             ff_mpeg4_init_partitions(s);
2342         break;
2343     }
2344
2345     s->resync_mb_x=0;
2346     s->resync_mb_y=0;
2347     s->first_slice_line = 1;
2348     s->ptr_lastgob = s->pb.buf;
2349     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2350 //    printf("row %d at %X\n", s->mb_y, (int)s);
2351         s->mb_x=0;
2352         s->mb_y= mb_y;
2353
2354         ff_set_qscale(s, s->qscale);
2355         ff_init_block_index(s);
2356
2357         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2358             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2359             int mb_type= s->mb_type[xy];
2360 //            int d;
2361             int dmin= INT_MAX;
2362             int dir;
2363
2364             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2365                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2366                 return -1;
2367             }
2368             if(s->data_partitioning){
2369                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2370                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2371                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2372                     return -1;
2373                 }
2374             }
2375
2376             s->mb_x = mb_x;
2377             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2378             ff_update_block_index(s);
2379
2380             if(CONFIG_H261_ENCODER && s->codec_id == CODEC_ID_H261){
2381                 ff_h261_reorder_mb_index(s);
2382                 xy= s->mb_y*s->mb_stride + s->mb_x;
2383                 mb_type= s->mb_type[xy];
2384             }
2385
2386             /* write gob / video packet header  */
2387             if(s->rtp_mode){
2388                 int current_packet_size, is_gob_start;
2389
2390                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2391
2392                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2393
2394                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2395
2396                 switch(s->codec_id){
2397                 case CODEC_ID_H263:
2398                 case CODEC_ID_H263P:
2399                     if(!s->h263_slice_structured)
2400                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2401                     break;
2402                 case CODEC_ID_MPEG2VIDEO:
2403                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2404                 case CODEC_ID_MPEG1VIDEO:
2405                     if(s->mb_skip_run) is_gob_start=0;
2406                     break;
2407                 }
2408
2409                 if(is_gob_start){
2410                     if(s->start_mb_y != mb_y || mb_x!=0){
2411                         write_slice_end(s);
2412
2413                         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
2414                             ff_mpeg4_init_partitions(s);
2415                         }
2416                     }
2417
2418                     assert((put_bits_count(&s->pb)&7) == 0);
2419                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2420
2421                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2422                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2423                         int d= 100 / s->avctx->error_rate;
2424                         if(r % d == 0){
2425                             current_packet_size=0;
2426                             s->pb.buf_ptr= s->ptr_lastgob;
2427                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2428                         }
2429                     }
2430
2431                     if (s->avctx->rtp_callback){
2432                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2433                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2434                     }
2435
2436                     switch(s->codec_id){
2437                     case CODEC_ID_MPEG4:
2438                         if (CONFIG_MPEG4_ENCODER) {
2439                             ff_mpeg4_encode_video_packet_header(s);
2440                             ff_mpeg4_clean_buffers(s);
2441                         }
2442                     break;
2443                     case CODEC_ID_MPEG1VIDEO:
2444                     case CODEC_ID_MPEG2VIDEO:
2445                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2446                             ff_mpeg1_encode_slice_header(s);
2447                             ff_mpeg1_clean_buffers(s);
2448                         }
2449                     break;
2450                     case CODEC_ID_H263:
2451                     case CODEC_ID_H263P:
2452                         if (CONFIG_H263_ENCODER)
2453                             h263_encode_gob_header(s, mb_y);
2454                     break;
2455                     }
2456
2457                     if(s->flags&CODEC_FLAG_PASS1){
2458                         int bits= put_bits_count(&s->pb);
2459                         s->misc_bits+= bits - s->last_bits;
2460                         s->last_bits= bits;
2461                     }
2462
2463                     s->ptr_lastgob += current_packet_size;
2464                     s->first_slice_line=1;
2465                     s->resync_mb_x=mb_x;
2466                     s->resync_mb_y=mb_y;
2467                 }
2468             }
2469
2470             if(  (s->resync_mb_x   == s->mb_x)
2471                && s->resync_mb_y+1 == s->mb_y){
2472                 s->first_slice_line=0;
2473             }
2474
2475             s->mb_skipped=0;
2476             s->dquant=0; //only for QP_RD
2477
2478             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
2479                 int next_block=0;
2480                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2481
2482                 copy_context_before_encode(&backup_s, s, -1);
2483                 backup_s.pb= s->pb;
2484                 best_s.data_partitioning= s->data_partitioning;
2485                 best_s.partitioned_frame= s->partitioned_frame;
2486                 if(s->data_partitioning){
2487                     backup_s.pb2= s->pb2;
2488                     backup_s.tex_pb= s->tex_pb;
2489                 }
2490
2491                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2492                     s->mv_dir = MV_DIR_FORWARD;
2493                     s->mv_type = MV_TYPE_16X16;
2494                     s->mb_intra= 0;
2495                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2496                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2497                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2498                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2499                 }
2500                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2501                     s->mv_dir = MV_DIR_FORWARD;
2502                     s->mv_type = MV_TYPE_FIELD;
2503                     s->mb_intra= 0;
2504                     for(i=0; i<2; i++){
2505                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2506                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2507                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2508                     }
2509                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2510                                  &dmin, &next_block, 0, 0);
2511                 }
2512                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2513                     s->mv_dir = MV_DIR_FORWARD;
2514                     s->mv_type = MV_TYPE_16X16;
2515                     s->mb_intra= 0;
2516                     s->mv[0][0][0] = 0;
2517                     s->mv[0][0][1] = 0;
2518                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2519                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2520                 }
2521                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2522                     s->mv_dir = MV_DIR_FORWARD;
2523                     s->mv_type = MV_TYPE_8X8;
2524                     s->mb_intra= 0;
2525                     for(i=0; i<4; i++){
2526                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2527                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2528                     }
2529                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2530                                  &dmin, &next_block, 0, 0);
2531                 }
2532                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2533                     s->mv_dir = MV_DIR_FORWARD;
2534                     s->mv_type = MV_TYPE_16X16;
2535                     s->mb_intra= 0;
2536                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2537                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2538                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2539                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2540                 }
2541                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2542                     s->mv_dir = MV_DIR_BACKWARD;
2543                     s->mv_type = MV_TYPE_16X16;
2544                     s->mb_intra= 0;
2545                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2546                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2547                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2548                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2549                 }
2550                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2551                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2552                     s->mv_type = MV_TYPE_16X16;
2553                     s->mb_intra= 0;
2554                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2555                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2556                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2557                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2558                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2559                                  &dmin, &next_block, 0, 0);
2560                 }
2561                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2562                     s->mv_dir = MV_DIR_FORWARD;
2563                     s->mv_type = MV_TYPE_FIELD;
2564                     s->mb_intra= 0;
2565                     for(i=0; i<2; i++){
2566                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2567                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2568                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2569                     }
2570                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2571                                  &dmin, &next_block, 0, 0);
2572                 }
2573                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2574                     s->mv_dir = MV_DIR_BACKWARD;
2575                     s->mv_type = MV_TYPE_FIELD;
2576                     s->mb_intra= 0;
2577                     for(i=0; i<2; i++){
2578                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2579                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2580                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2581                     }
2582                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2583                                  &dmin, &next_block, 0, 0);
2584                 }
2585                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2586                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2587                     s->mv_type = MV_TYPE_FIELD;
2588                     s->mb_intra= 0;
2589                     for(dir=0; dir<2; dir++){
2590                         for(i=0; i<2; i++){
2591                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2592                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2593                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2594                         }
2595                     }
2596                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2597                                  &dmin, &next_block, 0, 0);
2598                 }
2599                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2600                     s->mv_dir = 0;
2601                     s->mv_type = MV_TYPE_16X16;
2602                     s->mb_intra= 1;
2603                     s->mv[0][0][0] = 0;
2604                     s->mv[0][0][1] = 0;
2605                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2606                                  &dmin, &next_block, 0, 0);
2607                     if(s->h263_pred || s->h263_aic){
2608                         if(best_s.mb_intra)
2609                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2610                         else
2611                             ff_clean_intra_table_entries(s); //old mode?
2612                     }
2613                 }
2614
2615                 if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
2616                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2617                         const int last_qp= backup_s.qscale;
2618                         int qpi, qp, dc[6];
2619                         DCTELEM ac[6][16];
2620                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2621                         static const int dquant_tab[4]={-1,1,-2,2};
2622
2623                         assert(backup_s.dquant == 0);
2624
2625                         //FIXME intra
2626                         s->mv_dir= best_s.mv_dir;
2627                         s->mv_type = MV_TYPE_16X16;
2628                         s->mb_intra= best_s.mb_intra;
2629                         s->mv[0][0][0] = best_s.mv[0][0][0];
2630                         s->mv[0][0][1] = best_s.mv[0][0][1];
2631                         s->mv[1][0][0] = best_s.mv[1][0][0];
2632                         s->mv[1][0][1] = best_s.mv[1][0][1];
2633
2634                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2635                         for(; qpi<4; qpi++){
2636                             int dquant= dquant_tab[qpi];
2637                             qp= last_qp + dquant;
2638                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2639                                 continue;
2640                             backup_s.dquant= dquant;
2641                             if(s->mb_intra && s->dc_val[0]){
2642                                 for(i=0; i<6; i++){
2643                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2644                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2645                                 }
2646                             }
2647
2648                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2649                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2650                             if(best_s.qscale != qp){
2651                                 if(s->mb_intra && s->dc_val[0]){
2652                                     for(i=0; i<6; i++){
2653                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2654                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2655                                     }
2656                                 }
2657                             }
2658                         }
2659                     }
2660                 }
2661                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2662                     int mx= s->b_direct_mv_table[xy][0];
2663                     int my= s->b_direct_mv_table[xy][1];
2664
2665                     backup_s.dquant = 0;
2666                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2667                     s->mb_intra= 0;
2668                     ff_mpeg4_set_direct_mv(s, mx, my);
2669                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2670                                  &dmin, &next_block, mx, my);
2671                 }
2672                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2673                     backup_s.dquant = 0;
2674                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2675                     s->mb_intra= 0;
2676                     ff_mpeg4_set_direct_mv(s, 0, 0);
2677                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2678                                  &dmin, &next_block, 0, 0);
2679                 }
2680                 if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){
2681                     int coded=0;
2682                     for(i=0; i<6; i++)
2683                         coded |= s->block_last_index[i];
2684                     if(coded){
2685                         int mx,my;
2686                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2687                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2688                             mx=my=0; //FIXME find the one we actually used
2689                             ff_mpeg4_set_direct_mv(s, mx, my);
2690                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2691                             mx= s->mv[1][0][0];
2692                             my= s->mv[1][0][1];
2693                         }else{
2694                             mx= s->mv[0][0][0];
2695                             my= s->mv[0][0][1];
2696                         }
2697
2698                         s->mv_dir= best_s.mv_dir;
2699                         s->mv_type = best_s.mv_type;
2700                         s->mb_intra= 0;
2701 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2702                         s->mv[0][0][1] = best_s.mv[0][0][1];
2703                         s->mv[1][0][0] = best_s.mv[1][0][0];
2704                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2705                         backup_s.dquant= 0;
2706                         s->skipdct=1;
2707                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2708                                         &dmin, &next_block, mx, my);
2709                         s->skipdct=0;
2710                     }
2711                 }
2712
2713                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2714
2715                 copy_context_after_encode(s, &best_s, -1);
2716
2717                 pb_bits_count= put_bits_count(&s->pb);
2718                 flush_put_bits(&s->pb);
2719                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2720                 s->pb= backup_s.pb;
2721
2722                 if(s->data_partitioning){
2723                     pb2_bits_count= put_bits_count(&s->pb2);
2724                     flush_put_bits(&s->pb2);
2725                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2726                     s->pb2= backup_s.pb2;
2727
2728                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2729                     flush_put_bits(&s->tex_pb);
2730                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2731                     s->tex_pb= backup_s.tex_pb;
2732                 }
2733                 s->last_bits= put_bits_count(&s->pb);
2734
2735                 if (CONFIG_H263_ENCODER &&
2736                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2737                     ff_h263_update_motion_val(s);
2738
2739                 if(next_block==0){ //FIXME 16 vs linesize16
2740                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2741                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2742                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2743                 }
2744
2745                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2746                     MPV_decode_mb(s, s->block);
2747             } else {
2748                 int motion_x = 0, motion_y = 0;
2749                 s->mv_type=MV_TYPE_16X16;
2750                 // only one MB-Type possible
2751
2752                 switch(mb_type){
2753                 case CANDIDATE_MB_TYPE_INTRA:
2754                     s->mv_dir = 0;
2755                     s->mb_intra= 1;
2756                     motion_x= s->mv[0][0][0] = 0;
2757                     motion_y= s->mv[0][0][1] = 0;
2758                     break;
2759                 case CANDIDATE_MB_TYPE_INTER:
2760                     s->mv_dir = MV_DIR_FORWARD;
2761                     s->mb_intra= 0;
2762                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2763                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2764                     break;
2765                 case CANDIDATE_MB_TYPE_INTER_I:
2766                     s->mv_dir = MV_DIR_FORWARD;
2767                     s->mv_type = MV_TYPE_FIELD;
2768                     s->mb_intra= 0;
2769                     for(i=0; i<2; i++){
2770                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2771                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2772                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2773                     }
2774                     break;
2775                 case CANDIDATE_MB_TYPE_INTER4V:
2776                     s->mv_dir = MV_DIR_FORWARD;
2777                     s->mv_type = MV_TYPE_8X8;
2778                     s->mb_intra= 0;
2779                     for(i=0; i<4; i++){
2780                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2781                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2782                     }
2783                     break;
2784                 case CANDIDATE_MB_TYPE_DIRECT:
2785                     if (CONFIG_MPEG4_ENCODER) {
2786                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2787                         s->mb_intra= 0;
2788                         motion_x=s->b_direct_mv_table[xy][0];
2789                         motion_y=s->b_direct_mv_table[xy][1];
2790                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2791                     }
2792                     break;
2793                 case CANDIDATE_MB_TYPE_DIRECT0:
2794                     if (CONFIG_MPEG4_ENCODER) {
2795                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2796                         s->mb_intra= 0;
2797                         ff_mpeg4_set_direct_mv(s, 0, 0);
2798                     }
2799                     break;
2800                 case CANDIDATE_MB_TYPE_BIDIR:
2801                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2802                     s->mb_intra= 0;
2803                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2804                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2805                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2806                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2807                     break;
2808                 case CANDIDATE_MB_TYPE_BACKWARD:
2809                     s->mv_dir = MV_DIR_BACKWARD;
2810                     s->mb_intra= 0;
2811                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2812                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2813                     break;
2814                 case CANDIDATE_MB_TYPE_FORWARD:
2815                     s->mv_dir = MV_DIR_FORWARD;
2816                     s->mb_intra= 0;
2817                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2818                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2819 //                    printf(" %d %d ", motion_x, motion_y);
2820                     break;
2821                 case CANDIDATE_MB_TYPE_FORWARD_I:
2822                     s->mv_dir = MV_DIR_FORWARD;
2823                     s->mv_type = MV_TYPE_FIELD;
2824                     s->mb_intra= 0;
2825                     for(i=0; i<2; i++){
2826                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2827                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2828                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2829                     }
2830                     break;
2831                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2832                     s->mv_dir = MV_DIR_BACKWARD;
2833                     s->mv_type = MV_TYPE_FIELD;
2834                     s->mb_intra= 0;
2835                     for(i=0; i<2; i++){
2836                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2837                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2838                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2839                     }
2840                     break;
2841                 case CANDIDATE_MB_TYPE_BIDIR_I:
2842                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2843                     s->mv_type = MV_TYPE_FIELD;
2844                     s->mb_intra= 0;
2845                     for(dir=0; dir<2; dir++){
2846                         for(i=0; i<2; i++){
2847                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2848                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2849                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2850                         }
2851                     }
2852                     break;
2853                 default:
2854                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2855                 }
2856
2857                 encode_mb(s, motion_x, motion_y);
2858
2859                 // RAL: Update last macroblock type
2860                 s->last_mv_dir = s->mv_dir;
2861
2862                 if (CONFIG_H263_ENCODER &&
2863                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2864                     ff_h263_update_motion_val(s);
2865
2866                 MPV_decode_mb(s, s->block);
2867             }
2868
2869             /* clean the MV table in IPS frames for direct mode in B frames */
2870             if(s->mb_intra /* && I,P,S_TYPE */){
2871                 s->p_mv_table[xy][0]=0;
2872                 s->p_mv_table[xy][1]=0;
2873             }
2874
2875             if(s->flags&CODEC_FLAG_PSNR){
2876                 int w= 16;
2877                 int h= 16;
2878
2879                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2880                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2881
2882                 s->current_picture.f.error[0] += sse(
2883                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2884                     s->dest[0], w, h, s->linesize);
2885                 s->current_picture.f.error[1] += sse(
2886                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2887                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2888                 s->current_picture.f.error[2] += sse(
2889                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2890                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2891             }
2892             if(s->loop_filter){
2893                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2894                     ff_h263_loop_filter(s);
2895             }
2896 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
2897         }
2898     }
2899
2900     //not beautiful here but we must write it before flushing so it has to be here
2901     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
2902         msmpeg4_encode_ext_header(s);
2903
2904     write_slice_end(s);
2905
2906     /* Send the last GOB if RTP */
2907     if (s->avctx->rtp_callback) {
2908         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
2909         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
2910         /* Call the RTP callback to send the last GOB */
2911         emms_c();
2912         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
2913     }
2914
2915     return 0;
2916 }
2917
2918 #define MERGE(field) dst->field += src->field; src->field=0
2919 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
2920     MERGE(me.scene_change_score);
2921     MERGE(me.mc_mb_var_sum_temp);
2922     MERGE(me.mb_var_sum_temp);
2923 }
2924
2925 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
2926     int i;
2927
2928     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
2929     MERGE(dct_count[1]);
2930     MERGE(mv_bits);
2931     MERGE(i_tex_bits);
2932     MERGE(p_tex_bits);
2933     MERGE(i_count);
2934     MERGE(f_count);
2935     MERGE(b_count);
2936     MERGE(skip_count);
2937     MERGE(misc_bits);
2938     MERGE(error_count);
2939     MERGE(padding_bug_score);
2940     MERGE(current_picture.f.error[0]);
2941     MERGE(current_picture.f.error[1]);
2942     MERGE(current_picture.f.error[2]);
2943
2944     if(dst->avctx->noise_reduction){
2945         for(i=0; i<64; i++){
2946             MERGE(dct_error_sum[0][i]);
2947             MERGE(dct_error_sum[1][i]);
2948         }
2949     }
2950
2951     assert(put_bits_count(&src->pb) % 8 ==0);
2952     assert(put_bits_count(&dst->pb) % 8 ==0);
2953     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
2954     flush_put_bits(&dst->pb);
2955 }
2956
2957 static int estimate_qp(MpegEncContext *s, int dry_run){
2958     if (s->next_lambda){
2959         s->current_picture_ptr->f.quality =
2960         s->current_picture.f.quality = s->next_lambda;
2961         if(!dry_run) s->next_lambda= 0;
2962     } else if (!s->fixed_qscale) {
2963         s->current_picture_ptr->f.quality =
2964         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
2965         if (s->current_picture.f.quality < 0)
2966             return -1;
2967     }
2968
2969     if(s->adaptive_quant){
2970         switch(s->codec_id){
2971         case CODEC_ID_MPEG4:
2972             if (CONFIG_MPEG4_ENCODER)
2973                 ff_clean_mpeg4_qscales(s);
2974             break;
2975         case CODEC_ID_H263:
2976         case CODEC_ID_H263P:
2977         case CODEC_ID_FLV1:
2978             if (CONFIG_H263_ENCODER)
2979                 ff_clean_h263_qscales(s);
2980             break;
2981         default:
2982             ff_init_qscale_tab(s);
2983         }
2984
2985         s->lambda= s->lambda_table[0];
2986         //FIXME broken
2987     }else
2988         s->lambda = s->current_picture.f.quality;
2989 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
2990     update_qscale(s);
2991     return 0;
2992 }
2993
2994 /* must be called before writing the header */
2995 static void set_frame_distances(MpegEncContext * s){
2996     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
2997     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
2998
2999     if(s->pict_type==AV_PICTURE_TYPE_B){
3000         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3001         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3002     }else{
3003         s->pp_time= s->time - s->last_non_b_time;
3004         s->last_non_b_time= s->time;
3005         assert(s->picture_number==0 || s->pp_time > 0);
3006     }
3007 }
3008
3009 static int encode_picture(MpegEncContext *s, int picture_number)
3010 {
3011     int i;
3012     int bits;
3013     int context_count = s->slice_context_count;
3014
3015     s->picture_number = picture_number;
3016
3017     /* Reset the average MB variance */
3018     s->me.mb_var_sum_temp    =
3019     s->me.mc_mb_var_sum_temp = 0;
3020
3021     /* we need to initialize some time vars before we can encode b-frames */
3022     // RAL: Condition added for MPEG1VIDEO
3023     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3024         set_frame_distances(s);
3025     if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4)
3026         ff_set_mpeg4_time(s);
3027
3028     s->me.scene_change_score=0;
3029
3030 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3031
3032     if(s->pict_type==AV_PICTURE_TYPE_I){
3033         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3034         else                        s->no_rounding=0;
3035     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3036         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3037             s->no_rounding ^= 1;
3038     }
3039
3040     if(s->flags & CODEC_FLAG_PASS2){
3041         if (estimate_qp(s,1) < 0)
3042             return -1;
3043         ff_get_2pass_fcode(s);
3044     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3045         if(s->pict_type==AV_PICTURE_TYPE_B)
3046             s->lambda= s->last_lambda_for[s->pict_type];
3047         else
3048             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3049         update_qscale(s);
3050     }
3051
3052     s->mb_intra=0; //for the rate distortion & bit compare functions
3053     for(i=1; i<context_count; i++){
3054         ff_update_duplicate_context(s->thread_context[i], s);
3055     }
3056
3057     if(ff_init_me(s)<0)
3058         return -1;
3059
3060     /* Estimate motion for every MB */
3061     if(s->pict_type != AV_PICTURE_TYPE_I){
3062         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3063         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3064         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3065             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3066                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3067             }
3068         }
3069
3070         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3071     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3072         /* I-Frame */
3073         for(i=0; i<s->mb_stride*s->mb_height; i++)
3074             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3075
3076         if(!s->fixed_qscale){
3077             /* finding spatial complexity for I-frame rate control */
3078             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3079         }
3080     }
3081     for(i=1; i<context_count; i++){
3082         merge_context_after_me(s, s->thread_context[i]);
3083     }
3084     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3085     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3086     emms_c();
3087
3088     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3089         s->pict_type= AV_PICTURE_TYPE_I;
3090         for(i=0; i<s->mb_stride*s->mb_height; i++)
3091             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3092 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3093     }
3094
3095     if(!s->umvplus){
3096         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3097             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3098
3099             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3100                 int a,b;
3101                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3102                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3103                 s->f_code= FFMAX3(s->f_code, a, b);
3104             }
3105
3106             ff_fix_long_p_mvs(s);
3107             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3108             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3109                 int j;
3110                 for(i=0; i<2; i++){
3111                     for(j=0; j<2; j++)
3112                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3113                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3114                 }
3115             }
3116         }
3117
3118         if(s->pict_type==AV_PICTURE_TYPE_B){
3119             int a, b;
3120
3121             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3122             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3123             s->f_code = FFMAX(a, b);
3124
3125             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3126             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3127             s->b_code = FFMAX(a, b);
3128
3129             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3130             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3131             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3132             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3133             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3134                 int dir, j;
3135                 for(dir=0; dir<2; dir++){
3136                     for(i=0; i<2; i++){
3137                         for(j=0; j<2; j++){
3138                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3139                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3140                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3141                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3142                         }
3143                     }
3144                 }
3145             }
3146         }
3147     }
3148
3149     if (estimate_qp(s, 0) < 0)
3150         return -1;
3151
3152     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3153         s->qscale= 3; //reduce clipping problems
3154
3155     if (s->out_format == FMT_MJPEG) {
3156         /* for mjpeg, we do include qscale in the matrix */
3157         for(i=1;i<64;i++){
3158             int j= s->dsp.idct_permutation[i];
3159
3160             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3161         }
3162         s->y_dc_scale_table=
3163         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3164         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3165         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3166                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3167         s->qscale= 8;
3168     }
3169
3170     //FIXME var duplication
3171     s->current_picture_ptr->f.key_frame =
3172     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3173     s->current_picture_ptr->f.pict_type =
3174     s->current_picture.f.pict_type = s->pict_type;
3175
3176     if (s->current_picture.f.key_frame)
3177         s->picture_in_gop_number=0;
3178
3179     s->last_bits= put_bits_count(&s->pb);
3180     switch(s->out_format) {
3181     case FMT_MJPEG:
3182         if (CONFIG_MJPEG_ENCODER)
3183             ff_mjpeg_encode_picture_header(s);
3184         break;
3185     case FMT_H261:
3186         if (CONFIG_H261_ENCODER)
3187             ff_h261_encode_picture_header(s, picture_number);
3188         break;
3189     case FMT_H263:
3190         if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
3191             ff_wmv2_encode_picture_header(s, picture_number);
3192         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3193             msmpeg4_encode_picture_header(s, picture_number);
3194         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3195             mpeg4_encode_picture_header(s, picture_number);
3196         else if (CONFIG_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
3197             rv10_encode_picture_header(s, picture_number);
3198         else if (CONFIG_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
3199             rv20_encode_picture_header(s, picture_number);
3200         else if (CONFIG_FLV_ENCODER && s->codec_id == CODEC_ID_FLV1)
3201             ff_flv_encode_picture_header(s, picture_number);
3202         else if (CONFIG_H263_ENCODER)
3203             h263_encode_picture_header(s, picture_number);
3204         break;
3205     case FMT_MPEG1:
3206         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3207             mpeg1_encode_picture_header(s, picture_number);
3208         break;
3209     case FMT_H264:
3210         break;
3211     default:
3212         assert(0);
3213     }
3214     bits= put_bits_count(&s->pb);
3215     s->header_bits= bits - s->last_bits;
3216
3217     for(i=1; i<context_count; i++){
3218         update_duplicate_context_after_me(s->thread_context[i], s);
3219     }
3220     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3221     for(i=1; i<context_count; i++){
3222         merge_context_after_encode(s, s->thread_context[i]);
3223     }
3224     emms_c();
3225     return 0;
3226 }
3227
3228 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3229     const int intra= s->mb_intra;
3230     int i;
3231
3232     s->dct_count[intra]++;
3233
3234     for(i=0; i<64; i++){
3235         int level= block[i];
3236
3237         if(level){
3238             if(level>0){
3239                 s->dct_error_sum[intra][i] += level;
3240                 level -= s->dct_offset[intra][i];
3241                 if(level<0) level=0;
3242             }else{
3243                 s->dct_error_sum[intra][i] -= level;
3244                 level += s->dct_offset[intra][i];
3245                 if(level>0) level=0;
3246             }
3247             block[i]= level;
3248         }
3249     }
3250 }
3251
3252 static int dct_quantize_trellis_c(MpegEncContext *s,
3253                                   DCTELEM *block, int n,
3254                                   int qscale, int *overflow){
3255     const int *qmat;
3256     const uint8_t *scantable= s->intra_scantable.scantable;
3257     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3258     int max=0;
3259     unsigned int threshold1, threshold2;
3260     int bias=0;
3261     int run_tab[65];
3262     int level_tab[65];
3263     int score_tab[65];
3264     int survivor[65];
3265     int survivor_count;
3266     int last_run=0;
3267     int last_level=0;
3268     int last_score= 0;
3269     int last_i;
3270     int coeff[2][64];
3271     int coeff_count[64];
3272     int qmul, qadd, start_i, last_non_zero, i, dc;
3273     const int esc_length= s->ac_esc_length;
3274     uint8_t * length;
3275     uint8_t * last_length;
3276     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3277
3278     s->dsp.fdct (block);
3279
3280     if(s->dct_error_sum)
3281         s->denoise_dct(s, block);
3282     qmul= qscale*16;
3283     qadd= ((qscale-1)|1)*8;
3284
3285     if (s->mb_intra) {
3286         int q;
3287         if (!s->h263_aic) {
3288             if (n < 4)
3289                 q = s->y_dc_scale;
3290             else
3291                 q = s->c_dc_scale;
3292             q = q << 3;
3293         } else{
3294             /* For AIC we skip quant/dequant of INTRADC */
3295             q = 1 << 3;
3296             qadd=0;
3297         }
3298
3299         /* note: block[0] is assumed to be positive */
3300         block[0] = (block[0] + (q >> 1)) / q;
3301         start_i = 1;
3302         last_non_zero = 0;
3303         qmat = s->q_intra_matrix[qscale];
3304         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3305             bias= 1<<(QMAT_SHIFT-1);
3306         length     = s->intra_ac_vlc_length;
3307         last_length= s->intra_ac_vlc_last_length;
3308     } else {
3309         start_i = 0;
3310         last_non_zero = -1;
3311         qmat = s->q_inter_matrix[qscale];
3312         length     = s->inter_ac_vlc_length;
3313         last_length= s->inter_ac_vlc_last_length;
3314     }
3315     last_i= start_i;
3316
3317     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3318     threshold2= (threshold1<<1);
3319
3320     for(i=63; i>=start_i; i--) {
3321         const int j = scantable[i];
3322         int level = block[j] * qmat[j];
3323
3324         if(((unsigned)(level+threshold1))>threshold2){
3325             last_non_zero = i;
3326             break;
3327         }
3328     }
3329
3330     for(i=start_i; i<=last_non_zero; i++) {
3331         const int j = scantable[i];
3332         int level = block[j] * qmat[j];
3333
3334 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3335 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3336         if(((unsigned)(level+threshold1))>threshold2){
3337             if(level>0){
3338                 level= (bias + level)>>QMAT_SHIFT;
3339                 coeff[0][i]= level;
3340                 coeff[1][i]= level-1;
3341 //                coeff[2][k]= level-2;
3342             }else{
3343                 level= (bias - level)>>QMAT_SHIFT;
3344                 coeff[0][i]= -level;
3345                 coeff[1][i]= -level+1;
3346 //                coeff[2][k]= -level+2;
3347             }
3348             coeff_count[i]= FFMIN(level, 2);
3349             assert(coeff_count[i]);
3350             max |=level;
3351         }else{
3352             coeff[0][i]= (level>>31)|1;
3353             coeff_count[i]= 1;
3354         }
3355     }
3356
3357     *overflow= s->max_qcoeff < max; //overflow might have happened
3358
3359     if(last_non_zero < start_i){
3360         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3361         return last_non_zero;
3362     }
3363
3364     score_tab[start_i]= 0;
3365     survivor[0]= start_i;
3366     survivor_count= 1;
3367
3368     for(i=start_i; i<=last_non_zero; i++){
3369         int level_index, j, zero_distortion;
3370         int dct_coeff= FFABS(block[ scantable[i] ]);
3371         int best_score=256*256*256*120;
3372
3373         if (   s->dsp.fdct == fdct_ifast
3374 #ifndef FAAN_POSTSCALE
3375             || s->dsp.fdct == ff_faandct
3376 #endif
3377            )
3378             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3379         zero_distortion= dct_coeff*dct_coeff;
3380
3381         for(level_index=0; level_index < coeff_count[i]; level_index++){
3382             int distortion;
3383             int level= coeff[level_index][i];
3384             const int alevel= FFABS(level);
3385             int unquant_coeff;
3386
3387             assert(level);
3388
3389             if(s->out_format == FMT_H263){
3390                 unquant_coeff= alevel*qmul + qadd;
3391             }else{ //MPEG1
3392                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3393                 if(s->mb_intra){
3394                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3395                         unquant_coeff =   (unquant_coeff - 1) | 1;
3396                 }else{
3397                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3398                         unquant_coeff =   (unquant_coeff - 1) | 1;
3399                 }
3400                 unquant_coeff<<= 3;
3401             }
3402
3403             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3404             level+=64;
3405             if((level&(~127)) == 0){
3406                 for(j=survivor_count-1; j>=0; j--){
3407                     int run= i - survivor[j];
3408                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3409                     score += score_tab[i-run];
3410
3411                     if(score < best_score){
3412                         best_score= score;
3413                         run_tab[i+1]= run;
3414                         level_tab[i+1]= level-64;
3415                     }
3416                 }
3417
3418                 if(s->out_format == FMT_H263){
3419                     for(j=survivor_count-1; j>=0; j--){
3420                         int run= i - survivor[j];
3421                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3422                         score += score_tab[i-run];
3423                         if(score < last_score){
3424                             last_score= score;
3425                             last_run= run;
3426                             last_level= level-64;
3427                             last_i= i+1;
3428                         }
3429                     }
3430                 }
3431             }else{
3432                 distortion += esc_length*lambda;
3433                 for(j=survivor_count-1; j>=0; j--){
3434                     int run= i - survivor[j];
3435                     int score= distortion + score_tab[i-run];
3436
3437                     if(score < best_score){
3438                         best_score= score;
3439                         run_tab[i+1]= run;
3440                         level_tab[i+1]= level-64;
3441                     }
3442                 }
3443
3444                 if(s->out_format == FMT_H263){
3445                   for(j=survivor_count-1; j>=0; j--){
3446                         int run= i - survivor[j];
3447                         int score= distortion + score_tab[i-run];
3448                         if(score < last_score){
3449                             last_score= score;
3450                             last_run= run;
3451                             last_level= level-64;
3452                             last_i= i+1;
3453                         }
3454                     }
3455                 }
3456             }
3457         }
3458
3459         score_tab[i+1]= best_score;
3460
3461         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3462         if(last_non_zero <= 27){
3463             for(; survivor_count; survivor_count--){
3464                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3465                     break;
3466             }
3467         }else{
3468             for(; survivor_count; survivor_count--){
3469                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3470                     break;
3471             }
3472         }
3473
3474         survivor[ survivor_count++ ]= i+1;
3475     }
3476
3477     if(s->out_format != FMT_H263){
3478         last_score= 256*256*256*120;
3479         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3480             int score= score_tab[i];
3481             if(i) score += lambda*2; //FIXME exacter?
3482
3483             if(score < last_score){
3484                 last_score= score;
3485                 last_i= i;
3486                 last_level= level_tab[i];
3487                 last_run= run_tab[i];
3488             }
3489         }
3490     }
3491
3492     s->coded_score[n] = last_score;
3493
3494     dc= FFABS(block[0]);
3495     last_non_zero= last_i - 1;
3496     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3497
3498     if(last_non_zero < start_i)
3499         return last_non_zero;
3500
3501     if(last_non_zero == 0 && start_i == 0){
3502         int best_level= 0;
3503         int best_score= dc * dc;
3504
3505         for(i=0; i<coeff_count[0]; i++){
3506             int level= coeff[i][0];
3507             int alevel= FFABS(level);
3508             int unquant_coeff, score, distortion;
3509
3510             if(s->out_format == FMT_H263){
3511                     unquant_coeff= (alevel*qmul + qadd)>>3;
3512             }else{ //MPEG1
3513                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3514                     unquant_coeff =   (unquant_coeff - 1) | 1;
3515             }
3516             unquant_coeff = (unquant_coeff + 4) >> 3;
3517             unquant_coeff<<= 3 + 3;
3518
3519             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3520             level+=64;
3521             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3522             else                    score= distortion + esc_length*lambda;
3523
3524             if(score < best_score){
3525                 best_score= score;
3526                 best_level= level - 64;
3527             }
3528         }
3529         block[0]= best_level;
3530         s->coded_score[n] = best_score - dc*dc;
3531         if(best_level == 0) return -1;
3532         else                return last_non_zero;
3533     }
3534
3535     i= last_i;
3536     assert(last_level);
3537
3538     block[ perm_scantable[last_non_zero] ]= last_level;
3539     i -= last_run + 1;
3540
3541     for(; i>start_i; i -= run_tab[i] + 1){
3542         block[ perm_scantable[i-1] ]= level_tab[i];
3543     }
3544
3545     return last_non_zero;
3546 }
3547
3548 //#define REFINE_STATS 1
3549 static int16_t basis[64][64];
3550
3551 static void build_basis(uint8_t *perm){
3552     int i, j, x, y;
3553     emms_c();
3554     for(i=0; i<8; i++){
3555         for(j=0; j<8; j++){
3556             for(y=0; y<8; y++){
3557                 for(x=0; x<8; x++){
3558                     double s= 0.25*(1<<BASIS_SHIFT);
3559                     int index= 8*i + j;
3560                     int perm_index= perm[index];
3561                     if(i==0) s*= sqrt(0.5);
3562                     if(j==0) s*= sqrt(0.5);
3563                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3564                 }
3565             }
3566         }
3567     }
3568 }
3569
3570 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3571                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3572                         int n, int qscale){
3573     int16_t rem[64];
3574     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3575     const uint8_t *scantable= s->intra_scantable.scantable;
3576     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3577 //    unsigned int threshold1, threshold2;
3578 //    int bias=0;
3579     int run_tab[65];
3580     int prev_run=0;
3581     int prev_level=0;
3582     int qmul, qadd, start_i, last_non_zero, i, dc;
3583     uint8_t * length;
3584     uint8_t * last_length;
3585     int lambda;
3586     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3587 #ifdef REFINE_STATS
3588 static int count=0;
3589 static int after_last=0;
3590 static int to_zero=0;
3591 static int from_zero=0;
3592 static int raise=0;
3593 static int lower=0;
3594 static int messed_sign=0;
3595 #endif
3596
3597     if(basis[0][0] == 0)
3598         build_basis(s->dsp.idct_permutation);
3599
3600     qmul= qscale*2;
3601     qadd= (qscale-1)|1;
3602     if (s->mb_intra) {
3603         if (!s->h263_aic) {
3604             if (n < 4)
3605                 q = s->y_dc_scale;
3606             else
3607                 q = s->c_dc_scale;
3608         } else{
3609             /* For AIC we skip quant/dequant of INTRADC */
3610             q = 1;
3611             qadd=0;
3612         }
3613         q <<= RECON_SHIFT-3;
3614         /* note: block[0] is assumed to be positive */
3615         dc= block[0]*q;
3616 //        block[0] = (block[0] + (q >> 1)) / q;
3617         start_i = 1;
3618 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3619 //            bias= 1<<(QMAT_SHIFT-1);
3620         length     = s->intra_ac_vlc_length;
3621         last_length= s->intra_ac_vlc_last_length;
3622     } else {
3623         dc= 0;
3624         start_i = 0;
3625         length     = s->inter_ac_vlc_length;
3626         last_length= s->inter_ac_vlc_last_length;
3627     }
3628     last_non_zero = s->block_last_index[n];
3629
3630 #ifdef REFINE_STATS
3631 {START_TIMER
3632 #endif
3633     dc += (1<<(RECON_SHIFT-1));
3634     for(i=0; i<64; i++){
3635         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3636     }
3637 #ifdef REFINE_STATS
3638 STOP_TIMER("memset rem[]")}
3639 #endif
3640     sum=0;
3641     for(i=0; i<64; i++){
3642         int one= 36;
3643         int qns=4;
3644         int w;
3645
3646         w= FFABS(weight[i]) + qns*one;
3647         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3648
3649         weight[i] = w;
3650 //        w=weight[i] = (63*qns + (w/2)) / w;
3651
3652         assert(w>0);
3653         assert(w<(1<<6));
3654         sum += w*w;
3655     }
3656     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3657 #ifdef REFINE_STATS
3658 {START_TIMER
3659 #endif
3660     run=0;
3661     rle_index=0;
3662     for(i=start_i; i<=last_non_zero; i++){
3663         int j= perm_scantable[i];
3664         const int level= block[j];
3665         int coeff;
3666
3667         if(level){
3668             if(level<0) coeff= qmul*level - qadd;
3669             else        coeff= qmul*level + qadd;
3670             run_tab[rle_index++]=run;
3671             run=0;
3672
3673             s->dsp.add_8x8basis(rem, basis[j], coeff);
3674         }else{
3675             run++;
3676         }
3677     }
3678 #ifdef REFINE_STATS
3679 if(last_non_zero>0){
3680 STOP_TIMER("init rem[]")
3681 }
3682 }
3683
3684 {START_TIMER
3685 #endif
3686     for(;;){
3687         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3688         int best_coeff=0;
3689         int best_change=0;
3690         int run2, best_unquant_change=0, analyze_gradient;
3691 #ifdef REFINE_STATS
3692 {START_TIMER
3693 #endif
3694         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
3695
3696         if(analyze_gradient){
3697 #ifdef REFINE_STATS
3698 {START_TIMER
3699 #endif
3700             for(i=0; i<64; i++){
3701                 int w= weight[i];
3702
3703                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3704             }
3705 #ifdef REFINE_STATS
3706 STOP_TIMER("rem*w*w")}
3707 {START_TIMER
3708 #endif
3709             s->dsp.fdct(d1);
3710 #ifdef REFINE_STATS
3711 STOP_TIMER("dct")}
3712 #endif
3713         }
3714
3715         if(start_i){
3716             const int level= block[0];
3717             int change, old_coeff;
3718
3719             assert(s->mb_intra);
3720
3721             old_coeff= q*level;
3722
3723             for(change=-1; change<=1; change+=2){
3724                 int new_level= level + change;
3725                 int score, new_coeff;
3726
3727                 new_coeff= q*new_level;
3728                 if(new_coeff >= 2048 || new_coeff < 0)
3729                     continue;
3730
3731                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3732                 if(score<best_score){
3733                     best_score= score;
3734                     best_coeff= 0;
3735                     best_change= change;
3736                     best_unquant_change= new_coeff - old_coeff;
3737                 }
3738             }
3739         }
3740
3741         run=0;
3742         rle_index=0;
3743         run2= run_tab[rle_index++];
3744         prev_level=0;
3745         prev_run=0;
3746
3747         for(i=start_i; i<64; i++){
3748             int j= perm_scantable[i];
3749             const int level= block[j];
3750             int change, old_coeff;
3751
3752             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3753                 break;
3754
3755             if(level){
3756                 if(level<0) old_coeff= qmul*level - qadd;
3757                 else        old_coeff= qmul*level + qadd;
3758                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3759             }else{
3760                 old_coeff=0;
3761                 run2--;
3762                 assert(run2>=0 || i >= last_non_zero );
3763             }
3764
3765             for(change=-1; change<=1; change+=2){
3766                 int new_level= level + change;
3767                 int score, new_coeff, unquant_change;
3768
3769                 score=0;
3770                 if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3771                    continue;
3772
3773                 if(new_level){
3774                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3775                     else            new_coeff= qmul*new_level + qadd;
3776                     if(new_coeff >= 2048 || new_coeff <= -2048)
3777                         continue;
3778                     //FIXME check for overflow
3779
3780                     if(level){
3781                         if(level < 63 && level > -63){
3782                             if(i < last_non_zero)
3783                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3784                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3785                             else
3786                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3787                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3788                         }
3789                     }else{
3790                         assert(FFABS(new_level)==1);
3791
3792                         if(analyze_gradient){
3793                             int g= d1[ scantable[i] ];
3794                             if(g && (g^new_level) >= 0)
3795                                 continue;
3796                         }
3797
3798                         if(i < last_non_zero){
3799                             int next_i= i + run2 + 1;
3800                             int next_level= block[ perm_scantable[next_i] ] + 64;
3801
3802                             if(next_level&(~127))
3803                                 next_level= 0;
3804
3805                             if(next_i < last_non_zero)
3806                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3807                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3808                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3809                             else
3810                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3811                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3812                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3813                         }else{
3814                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3815                             if(prev_level){
3816                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3817                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3818                             }
3819                         }
3820                     }
3821                 }else{
3822                     new_coeff=0;
3823                     assert(FFABS(level)==1);
3824
3825                     if(i < last_non_zero){
3826                         int next_i= i + run2 + 1;
3827                         int next_level= block[ perm_scantable[next_i] ] + 64;
3828
3829                         if(next_level&(~127))
3830                             next_level= 0;
3831
3832                         if(next_i < last_non_zero)
3833                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3834                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3835                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3836                         else
3837                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3838                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3839                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3840                     }else{
3841                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3842                         if(prev_level){
3843                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3844                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3845                         }
3846                     }
3847                 }
3848
3849                 score *= lambda;
3850
3851                 unquant_change= new_coeff - old_coeff;
3852                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3853
3854                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3855                 if(score<best_score){
3856                     best_score= score;
3857                     best_coeff= i;
3858                     best_change= change;
3859                     best_unquant_change= unquant_change;
3860                 }
3861             }
3862             if(level){
3863                 prev_level= level + 64;
3864                 if(prev_level&(~127))
3865                     prev_level= 0;
3866                 prev_run= run;
3867                 run=0;
3868             }else{
3869                 run++;
3870             }
3871         }
3872 #ifdef REFINE_STATS
3873 STOP_TIMER("iterative step")}
3874 #endif
3875
3876         if(best_change){
3877             int j= perm_scantable[ best_coeff ];
3878
3879             block[j] += best_change;
3880
3881             if(best_coeff > last_non_zero){
3882                 last_non_zero= best_coeff;
3883                 assert(block[j]);
3884 #ifdef REFINE_STATS
3885 after_last++;
3886 #endif
3887             }else{
3888 #ifdef REFINE_STATS
3889 if(block[j]){
3890     if(block[j] - best_change){
3891         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3892             raise++;
3893         }else{
3894             lower++;
3895         }
3896     }else{
3897         from_zero++;
3898     }
3899 }else{
3900     to_zero++;
3901 }
3902 #endif
3903                 for(; last_non_zero>=start_i; last_non_zero--){
3904                     if(block[perm_scantable[last_non_zero]])
3905                         break;
3906                 }
3907             }
3908 #ifdef REFINE_STATS
3909 count++;
3910 if(256*256*256*64 % count == 0){
3911     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
3912 }
3913 #endif
3914             run=0;
3915             rle_index=0;
3916             for(i=start_i; i<=last_non_zero; i++){
3917                 int j= perm_scantable[i];
3918                 const int level= block[j];
3919
3920                  if(level){
3921                      run_tab[rle_index++]=run;
3922                      run=0;
3923                  }else{
3924                      run++;
3925                  }
3926             }
3927
3928             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
3929         }else{
3930             break;
3931         }
3932     }
3933 #ifdef REFINE_STATS
3934 if(last_non_zero>0){
3935 STOP_TIMER("iterative search")
3936 }
3937 }
3938 #endif
3939
3940     return last_non_zero;
3941 }
3942
3943 int dct_quantize_c(MpegEncContext *s,
3944                         DCTELEM *block, int n,
3945                         int qscale, int *overflow)
3946 {
3947     int i, j, level, last_non_zero, q, start_i;
3948     const int *qmat;
3949     const uint8_t *scantable= s->intra_scantable.scantable;
3950     int bias;
3951     int max=0;
3952     unsigned int threshold1, threshold2;
3953
3954     s->dsp.fdct (block);
3955
3956     if(s->dct_error_sum)
3957         s->denoise_dct(s, block);
3958
3959     if (s->mb_intra) {
3960         if (!s->h263_aic) {
3961             if (n < 4)
3962                 q = s->y_dc_scale;
3963             else
3964                 q = s->c_dc_scale;
3965             q = q << 3;
3966         } else
3967             /* For AIC we skip quant/dequant of INTRADC */
3968             q = 1 << 3;
3969
3970         /* note: block[0] is assumed to be positive */
3971         block[0] = (block[0] + (q >> 1)) / q;
3972         start_i = 1;
3973         last_non_zero = 0;
3974         qmat = s->q_intra_matrix[qscale];
3975         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3976     } else {
3977         start_i = 0;
3978         last_non_zero = -1;
3979         qmat = s->q_inter_matrix[qscale];
3980         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3981     }
3982     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3983     threshold2= (threshold1<<1);
3984     for(i=63;i>=start_i;i--) {
3985         j = scantable[i];
3986         level = block[j] * qmat[j];
3987
3988         if(((unsigned)(level+threshold1))>threshold2){
3989             last_non_zero = i;
3990             break;
3991         }else{
3992             block[j]=0;
3993         }
3994     }
3995     for(i=start_i; i<=last_non_zero; i++) {
3996         j = scantable[i];
3997         level = block[j] * qmat[j];
3998
3999 //        if(   bias+level >= (1<<QMAT_SHIFT)
4000 //           || bias-level >= (1<<QMAT_SHIFT)){
4001         if(((unsigned)(level+threshold1))>threshold2){
4002             if(level>0){
4003                 level= (bias + level)>>QMAT_SHIFT;
4004                 block[j]= level;
4005             }else{
4006                 level= (bias - level)>>QMAT_SHIFT;
4007                 block[j]= -level;
4008             }
4009             max |=level;
4010         }else{
4011             block[j]=0;
4012         }
4013     }
4014     *overflow= s->max_qcoeff < max; //overflow might have happened
4015
4016     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4017     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4018         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4019
4020     return last_non_zero;
4021 }
4022
4023 #define OFFSET(x) offsetof(MpegEncContext, x)
4024 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4025 static const AVOption h263_options[] = {
4026     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4027     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4028     { NULL },
4029 };
4030
4031 static const AVClass h263_class = {
4032     .class_name = "H.263 encoder",
4033     .item_name  = av_default_item_name,
4034     .option     = h263_options,
4035     .version    = LIBAVUTIL_VERSION_INT,
4036 };
4037
4038 AVCodec ff_h263_encoder = {
4039     .name           = "h263",
4040     .type           = AVMEDIA_TYPE_VIDEO,
4041     .id             = CODEC_ID_H263,
4042     .priv_data_size = sizeof(MpegEncContext),
4043     .init           = MPV_encode_init,
4044     .encode         = MPV_encode_picture,
4045     .close          = MPV_encode_end,
4046     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4047     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4048     .priv_class     = &h263_class,
4049 };
4050
4051 static const AVOption h263p_options[] = {
4052     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4053     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4054     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4055     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4056     { NULL },
4057 };
4058 static const AVClass h263p_class = {
4059     .class_name = "H.263p encoder",
4060     .item_name  = av_default_item_name,
4061     .option     = h263p_options,
4062     .version    = LIBAVUTIL_VERSION_INT,
4063 };
4064
4065 AVCodec ff_h263p_encoder = {
4066     .name           = "h263p",
4067     .type           = AVMEDIA_TYPE_VIDEO,
4068     .id             = CODEC_ID_H263P,
4069     .priv_data_size = sizeof(MpegEncContext),
4070     .init           = MPV_encode_init,
4071     .encode         = MPV_encode_picture,
4072     .close          = MPV_encode_end,
4073     .capabilities = CODEC_CAP_SLICE_THREADS,
4074     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4075     .long_name= NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4076     .priv_class     = &h263p_class,
4077 };
4078
4079 AVCodec ff_msmpeg4v2_encoder = {
4080     .name           = "msmpeg4v2",
4081     .type           = AVMEDIA_TYPE_VIDEO,
4082     .id             = CODEC_ID_MSMPEG4V2,
4083     .priv_data_size = sizeof(MpegEncContext),
4084     .init           = MPV_encode_init,
4085     .encode         = MPV_encode_picture,
4086     .close          = MPV_encode_end,
4087     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4088     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4089 };
4090
4091 AVCodec ff_msmpeg4v3_encoder = {
4092     .name           = "msmpeg4",
4093     .type           = AVMEDIA_TYPE_VIDEO,
4094     .id             = CODEC_ID_MSMPEG4V3,
4095     .priv_data_size = sizeof(MpegEncContext),
4096     .init           = MPV_encode_init,
4097     .encode         = MPV_encode_picture,
4098     .close          = MPV_encode_end,
4099     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4100     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4101 };
4102
4103 AVCodec ff_wmv1_encoder = {
4104     .name           = "wmv1",
4105     .type           = AVMEDIA_TYPE_VIDEO,
4106     .id             = CODEC_ID_WMV1,
4107     .priv_data_size = sizeof(MpegEncContext),
4108     .init           = MPV_encode_init,
4109     .encode         = MPV_encode_picture,
4110     .close          = MPV_encode_end,
4111     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4112     .long_name= NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4113 };