]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
8svx: fix memleak in iff-fibonacci fate test.
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "mpegvideo_common.h"
37 #include "h263.h"
38 #include "mjpegenc.h"
39 #include "msmpeg4.h"
40 #include "faandct.h"
41 #include "thread.h"
42 #include "aandcttab.h"
43 #include "flv.h"
44 #include "mpeg4video.h"
45 #include "internal.h"
46 #include <limits.h>
47 #include "sp5x.h"
48
49 //#undef NDEBUG
50 //#include <assert.h>
51
52 static int encode_picture(MpegEncContext *s, int picture_number);
53 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
54 static int sse_mb(MpegEncContext *s);
55 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
56 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
57
58 /* enable all paranoid tests for rounding, overflows, etc... */
59 //#define PARANOID
60
61 //#define DEBUG
62
63 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
64 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
65
66 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
67                        uint16_t (*qmat16)[2][64],
68                        const uint16_t *quant_matrix,
69                        int bias, int qmin, int qmax, int intra)
70 {
71     int qscale;
72     int shift = 0;
73
74     for (qscale = qmin; qscale <= qmax; qscale++) {
75         int i;
76         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
77             dsp->fdct == ff_jpeg_fdct_islow_10
78 #ifdef FAAN_POSTSCALE
79             || dsp->fdct == ff_faandct
80 #endif
81             ) {
82             for (i = 0; i < 64; i++) {
83                 const int j = dsp->idct_permutation[i];
84                 /* 16 <= qscale * quant_matrix[i] <= 7905
85                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
86                  *             19952 <=              x  <= 249205026
87                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
88                  *           3444240 >= (1 << 36) / (x) >= 275 */
89
90                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
91                                         (qscale * quant_matrix[j]));
92             }
93         } else if (dsp->fdct == fdct_ifast
94 #ifndef FAAN_POSTSCALE
95                    || dsp->fdct == ff_faandct
96 #endif
97                    ) {
98             for (i = 0; i < 64; i++) {
99                 const int j = dsp->idct_permutation[i];
100                 /* 16 <= qscale * quant_matrix[i] <= 7905
101                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
102                  *             19952 <=              x  <= 249205026
103                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
104                  *           3444240 >= (1 << 36) / (x) >= 275 */
105
106                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
107                                         (ff_aanscales[i] * qscale * quant_matrix[j]));
108             }
109         } else {
110             for (i = 0; i < 64; i++) {
111                 const int j = dsp->idct_permutation[i];
112                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
113                  * Assume x = qscale * quant_matrix[i]
114                  * So             16 <=              x  <= 7905
115                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
116                  * so          32768 >= (1 << 19) / (x) >= 67 */
117                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
118                                         (qscale * quant_matrix[j]));
119                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
120                 //                    (qscale * quant_matrix[i]);
121                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
122                                        (qscale * quant_matrix[j]);
123
124                 if (qmat16[qscale][0][i] == 0 ||
125                     qmat16[qscale][0][i] == 128 * 256)
126                     qmat16[qscale][0][i] = 128 * 256 - 1;
127                 qmat16[qscale][1][i] =
128                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
129                                 qmat16[qscale][0][i]);
130             }
131         }
132
133         for (i = intra; i < 64; i++) {
134             int64_t max = 8191;
135             if (dsp->fdct == fdct_ifast
136 #ifndef FAAN_POSTSCALE
137                 || dsp->fdct == ff_faandct
138 #endif
139                ) {
140                 max = (8191LL * ff_aanscales[i]) >> 14;
141             }
142             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
143                 shift++;
144             }
145         }
146     }
147     if (shift) {
148         av_log(NULL, AV_LOG_INFO,
149                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
150                QMAT_SHIFT - shift);
151     }
152 }
153
154 static inline void update_qscale(MpegEncContext *s)
155 {
156     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
157                 (FF_LAMBDA_SHIFT + 7);
158     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
159
160     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
161                  FF_LAMBDA_SHIFT;
162 }
163
164 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
165 {
166     int i;
167
168     if (matrix) {
169         put_bits(pb, 1, 1);
170         for (i = 0; i < 64; i++) {
171             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
172         }
173     } else
174         put_bits(pb, 1, 0);
175 }
176
177 /**
178  * init s->current_picture.qscale_table from s->lambda_table
179  */
180 void ff_init_qscale_tab(MpegEncContext *s)
181 {
182     int8_t * const qscale_table = s->current_picture.f.qscale_table;
183     int i;
184
185     for (i = 0; i < s->mb_num; i++) {
186         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
187         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
188         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
189                                                   s->avctx->qmax);
190     }
191 }
192
193 static void copy_picture_attributes(MpegEncContext *s,
194                                     AVFrame *dst,
195                                     AVFrame *src)
196 {
197     int i;
198
199     dst->pict_type              = src->pict_type;
200     dst->quality                = src->quality;
201     dst->coded_picture_number   = src->coded_picture_number;
202     dst->display_picture_number = src->display_picture_number;
203     //dst->reference              = src->reference;
204     dst->pts                    = src->pts;
205     dst->interlaced_frame       = src->interlaced_frame;
206     dst->top_field_first        = src->top_field_first;
207
208     if (s->avctx->me_threshold) {
209         if (!src->motion_val[0])
210             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
211         if (!src->mb_type)
212             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
213         if (!src->ref_index[0])
214             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
215         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
216             av_log(s->avctx, AV_LOG_ERROR,
217                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
218                    src->motion_subsample_log2, dst->motion_subsample_log2);
219
220         memcpy(dst->mb_type, src->mb_type,
221                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
222
223         for (i = 0; i < 2; i++) {
224             int stride = ((16 * s->mb_width ) >>
225                           src->motion_subsample_log2) + 1;
226             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
227
228             if (src->motion_val[i] &&
229                 src->motion_val[i] != dst->motion_val[i]) {
230                 memcpy(dst->motion_val[i], src->motion_val[i],
231                        2 * stride * height * sizeof(int16_t));
232             }
233             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
234                 memcpy(dst->ref_index[i], src->ref_index[i],
235                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
236             }
237         }
238     }
239 }
240
241 static void update_duplicate_context_after_me(MpegEncContext *dst,
242                                               MpegEncContext *src)
243 {
244 #define COPY(a) dst->a= src->a
245     COPY(pict_type);
246     COPY(current_picture);
247     COPY(f_code);
248     COPY(b_code);
249     COPY(qscale);
250     COPY(lambda);
251     COPY(lambda2);
252     COPY(picture_in_gop_number);
253     COPY(gop_picture_number);
254     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
255     COPY(progressive_frame);    // FIXME don't set in encode_header
256     COPY(partitioned_frame);    // FIXME don't set in encode_header
257 #undef COPY
258 }
259
260 /**
261  * Set the given MpegEncContext to defaults for encoding.
262  * the changed fields will not depend upon the prior state of the MpegEncContext.
263  */
264 static void MPV_encode_defaults(MpegEncContext *s)
265 {
266     int i;
267     MPV_common_defaults(s);
268
269     for (i = -16; i < 16; i++) {
270         default_fcode_tab[i + MAX_MV] = 1;
271     }
272     s->me.mv_penalty = default_mv_penalty;
273     s->fcode_tab     = default_fcode_tab;
274 }
275
276 /* init video encoder */
277 av_cold int MPV_encode_init(AVCodecContext *avctx)
278 {
279     MpegEncContext *s = avctx->priv_data;
280     int i;
281     int chroma_h_shift, chroma_v_shift;
282
283     MPV_encode_defaults(s);
284
285     switch (avctx->codec_id) {
286     case CODEC_ID_MPEG2VIDEO:
287         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
288             avctx->pix_fmt != PIX_FMT_YUV422P) {
289             av_log(avctx, AV_LOG_ERROR,
290                    "only YUV420 and YUV422 are supported\n");
291             return -1;
292         }
293         break;
294     case CODEC_ID_LJPEG:
295         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
296             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
297             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
298             avctx->pix_fmt != PIX_FMT_BGRA     &&
299             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
300               avctx->pix_fmt != PIX_FMT_YUV422P &&
301               avctx->pix_fmt != PIX_FMT_YUV444P) ||
302              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
303             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
304             return -1;
305         }
306         break;
307     case CODEC_ID_MJPEG:
308     case CODEC_ID_AMV:
309         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
310             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
311             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
312               avctx->pix_fmt != PIX_FMT_YUV422P) ||
313              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
314             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
315             return -1;
316         }
317         break;
318     default:
319         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
320             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
321             return -1;
322         }
323     }
324
325     switch (avctx->pix_fmt) {
326     case PIX_FMT_YUVJ422P:
327     case PIX_FMT_YUV422P:
328         s->chroma_format = CHROMA_422;
329         break;
330     case PIX_FMT_YUVJ420P:
331     case PIX_FMT_YUV420P:
332     default:
333         s->chroma_format = CHROMA_420;
334         break;
335     }
336
337     s->bit_rate = avctx->bit_rate;
338     s->width    = avctx->width;
339     s->height   = avctx->height;
340     if (avctx->gop_size > 600 &&
341         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
342         av_log(avctx, AV_LOG_ERROR,
343                "Warning keyframe interval too large! reducing it ...\n");
344         avctx->gop_size = 600;
345     }
346     s->gop_size     = avctx->gop_size;
347     s->avctx        = avctx;
348     s->flags        = avctx->flags;
349     s->flags2       = avctx->flags2;
350     s->max_b_frames = avctx->max_b_frames;
351     s->codec_id     = avctx->codec->id;
352     s->luma_elim_threshold   = avctx->luma_elim_threshold;
353     s->chroma_elim_threshold = avctx->chroma_elim_threshold;
354     s->strict_std_compliance = avctx->strict_std_compliance;
355 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
356     if (avctx->flags & CODEC_FLAG_PART)
357         s->data_partitioning = 1;
358 #endif
359     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
360     s->mpeg_quant         = avctx->mpeg_quant;
361     s->rtp_mode           = !!avctx->rtp_payload_size;
362     s->intra_dc_precision = avctx->intra_dc_precision;
363     s->user_specified_pts = AV_NOPTS_VALUE;
364
365     if (s->gop_size <= 1) {
366         s->intra_only = 1;
367         s->gop_size   = 12;
368     } else {
369         s->intra_only = 0;
370     }
371
372     s->me_method = avctx->me_method;
373
374     /* Fixed QSCALE */
375     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
376
377     s->adaptive_quant = (s->avctx->lumi_masking ||
378                          s->avctx->dark_masking ||
379                          s->avctx->temporal_cplx_masking ||
380                          s->avctx->spatial_cplx_masking  ||
381                          s->avctx->p_masking      ||
382                          s->avctx->border_masking ||
383                          (s->flags & CODEC_FLAG_QP_RD)) &&
384                         !s->fixed_qscale;
385
386     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
387 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
388     s->alternate_scan   = !!(s->flags  & CODEC_FLAG_ALT_SCAN);
389     s->intra_vlc_format = !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
390     s->q_scale_type     = !!(s->flags2 & CODEC_FLAG2_NON_LINEAR_QUANT);
391     s->obmc             = !!(s->flags  & CODEC_FLAG_OBMC);
392 #endif
393
394     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
395         av_log(avctx, AV_LOG_ERROR,
396                "a vbv buffer size is needed, "
397                "for encoding with a maximum bitrate\n");
398         return -1;
399     }
400
401     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
402         av_log(avctx, AV_LOG_INFO,
403                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
404     }
405
406     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
407         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
408         return -1;
409     }
410
411     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
412         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
413         return -1;
414     }
415
416     if (avctx->rc_max_rate &&
417         avctx->rc_max_rate == avctx->bit_rate &&
418         avctx->rc_max_rate != avctx->rc_min_rate) {
419         av_log(avctx, AV_LOG_INFO,
420                "impossible bitrate constraints, this will fail\n");
421     }
422
423     if (avctx->rc_buffer_size &&
424         avctx->bit_rate * (int64_t)avctx->time_base.num >
425             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
426         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
427         return -1;
428     }
429
430     if (!s->fixed_qscale &&
431         avctx->bit_rate * av_q2d(avctx->time_base) >
432             avctx->bit_rate_tolerance) {
433         av_log(avctx, AV_LOG_ERROR,
434                "bitrate tolerance too small for bitrate\n");
435         return -1;
436     }
437
438     if (s->avctx->rc_max_rate &&
439         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
440         (s->codec_id == CODEC_ID_MPEG1VIDEO ||
441          s->codec_id == CODEC_ID_MPEG2VIDEO) &&
442         90000LL * (avctx->rc_buffer_size - 1) >
443             s->avctx->rc_max_rate * 0xFFFFLL) {
444         av_log(avctx, AV_LOG_INFO,
445                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
446                "specified vbv buffer is too large for the given bitrate!\n");
447     }
448
449     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != CODEC_ID_MPEG4 &&
450         s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P &&
451         s->codec_id != CODEC_ID_FLV1) {
452         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
453         return -1;
454     }
455
456     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
457         av_log(avctx, AV_LOG_ERROR,
458                "OBMC is only supported with simple mb decision\n");
459         return -1;
460     }
461
462 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
463     if (s->obmc && s->codec_id != CODEC_ID_H263 &&
464         s->codec_id != CODEC_ID_H263P) {
465         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
466         return -1;
467     }
468 #endif
469
470     if (s->quarter_sample && s->codec_id != CODEC_ID_MPEG4) {
471         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
472         return -1;
473     }
474
475 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
476     if (s->data_partitioning && s->codec_id != CODEC_ID_MPEG4) {
477         av_log(avctx, AV_LOG_ERROR,
478                "data partitioning not supported by codec\n");
479         return -1;
480     }
481 #endif
482
483     if (s->max_b_frames                    &&
484         s->codec_id != CODEC_ID_MPEG4      &&
485         s->codec_id != CODEC_ID_MPEG1VIDEO &&
486         s->codec_id != CODEC_ID_MPEG2VIDEO) {
487         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
488         return -1;
489     }
490
491     if ((s->codec_id == CODEC_ID_MPEG4 ||
492          s->codec_id == CODEC_ID_H263  ||
493          s->codec_id == CODEC_ID_H263P) &&
494         (avctx->sample_aspect_ratio.num > 255 ||
495          avctx->sample_aspect_ratio.den > 255)) {
496         av_log(avctx, AV_LOG_WARNING,
497                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
498                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
499         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
500                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
501     }
502
503     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME
504 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
505                     | CODEC_FLAG_ALT_SCAN
506 #endif
507         )) &&
508         s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO) {
509         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
510         return -1;
511     }
512
513     // FIXME mpeg2 uses that too
514     if (s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4) {
515         av_log(avctx, AV_LOG_ERROR,
516                "mpeg2 style quantization not supported by codec\n");
517         return -1;
518     }
519
520     if ((s->flags & CODEC_FLAG_CBP_RD) && !avctx->trellis) {
521         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
522         return -1;
523     }
524
525     if ((s->flags & CODEC_FLAG_QP_RD) &&
526         s->avctx->mb_decision != FF_MB_DECISION_RD) {
527         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
528         return -1;
529     }
530
531     if (s->avctx->scenechange_threshold < 1000000000 &&
532         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
533         av_log(avctx, AV_LOG_ERROR,
534                "closed gop with scene change detection are not supported yet, "
535                "set threshold to 1000000000\n");
536         return -1;
537     }
538
539 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
540     if ((s->flags2 & CODEC_FLAG2_INTRA_VLC) &&
541         s->codec_id != CODEC_ID_MPEG2VIDEO) {
542         av_log(avctx, AV_LOG_ERROR,
543                "intra vlc table not supported by codec\n");
544         return -1;
545     }
546 #endif
547
548     if (s->flags & CODEC_FLAG_LOW_DELAY) {
549         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
550             av_log(avctx, AV_LOG_ERROR,
551                   "low delay forcing is only available for mpeg2\n");
552             return -1;
553         }
554         if (s->max_b_frames != 0) {
555             av_log(avctx, AV_LOG_ERROR,
556                    "b frames cannot be used with low delay\n");
557             return -1;
558         }
559     }
560
561     if (s->q_scale_type == 1) {
562 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
563         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
564             av_log(avctx, AV_LOG_ERROR,
565                    "non linear quant is only available for mpeg2\n");
566             return -1;
567         }
568 #endif
569         if (avctx->qmax > 12) {
570             av_log(avctx, AV_LOG_ERROR,
571                    "non linear quant only supports qmax <= 12 currently\n");
572             return -1;
573         }
574     }
575
576     if (s->avctx->thread_count > 1         &&
577         s->codec_id != CODEC_ID_MPEG4      &&
578         s->codec_id != CODEC_ID_MPEG1VIDEO &&
579         s->codec_id != CODEC_ID_MPEG2VIDEO &&
580         (s->codec_id != CODEC_ID_H263P
581 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
582          || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT)
583 #endif
584          )) {
585         av_log(avctx, AV_LOG_ERROR,
586                "multi threaded encoding not supported by codec\n");
587         return -1;
588     }
589
590     if (s->avctx->thread_count < 1) {
591         av_log(avctx, AV_LOG_ERROR,
592                "automatic thread number detection not supported by codec, "
593                "patch welcome\n");
594         return -1;
595     }
596
597     if (s->avctx->thread_count > 1)
598         s->rtp_mode = 1;
599
600     if (!avctx->time_base.den || !avctx->time_base.num) {
601         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
602         return -1;
603     }
604
605     i = (INT_MAX / 2 + 128) >> 8;
606     if (avctx->me_threshold >= i) {
607         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
608                i - 1);
609         return -1;
610     }
611     if (avctx->mb_threshold >= i) {
612         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
613                i - 1);
614         return -1;
615     }
616
617     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
618         av_log(avctx, AV_LOG_INFO,
619                "notice: b_frame_strategy only affects the first pass\n");
620         avctx->b_frame_strategy = 0;
621     }
622
623     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
624     if (i > 1) {
625         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
626         avctx->time_base.den /= i;
627         avctx->time_base.num /= i;
628         //return -1;
629     }
630
631     if (s->mpeg_quant || s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || s->codec_id == CODEC_ID_MJPEG || s->codec_id==CODEC_ID_AMV) {
632         // (a + x * 3 / 8) / x
633         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
634         s->inter_quant_bias = 0;
635     } else {
636         s->intra_quant_bias = 0;
637         // (a - x / 4) / x
638         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
639     }
640
641     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
642         s->intra_quant_bias = avctx->intra_quant_bias;
643     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
644         s->inter_quant_bias = avctx->inter_quant_bias;
645
646     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
647
648     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
649                                   &chroma_v_shift);
650
651     if (avctx->codec_id == CODEC_ID_MPEG4 &&
652         s->avctx->time_base.den > (1 << 16) - 1) {
653         av_log(avctx, AV_LOG_ERROR,
654                "timebase %d/%d not supported by MPEG 4 standard, "
655                "the maximum admitted value for the timebase denominator "
656                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
657                (1 << 16) - 1);
658         return -1;
659     }
660     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
661
662     switch (avctx->codec->id) {
663     case CODEC_ID_MPEG1VIDEO:
664         s->out_format = FMT_MPEG1;
665         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
666         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
667         break;
668     case CODEC_ID_MPEG2VIDEO:
669         s->out_format = FMT_MPEG1;
670         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
671         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
672         s->rtp_mode   = 1;
673         break;
674     case CODEC_ID_LJPEG:
675     case CODEC_ID_MJPEG:
676     case CODEC_ID_AMV:
677         s->out_format = FMT_MJPEG;
678         s->intra_only = 1; /* force intra only for jpeg */
679         if (avctx->codec->id == CODEC_ID_LJPEG && avctx->pix_fmt   == PIX_FMT_BGRA) {
680             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
681             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
682             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
683         } else {
684             s->mjpeg_vsample[0] = 2;
685             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
686             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
687             s->mjpeg_hsample[0] = 2;
688             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
689             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
690         }
691         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
692             ff_mjpeg_encode_init(s) < 0)
693             return -1;
694         avctx->delay = 0;
695         s->low_delay = 1;
696         break;
697     case CODEC_ID_H261:
698         if (!CONFIG_H261_ENCODER)
699             return -1;
700         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
701             av_log(avctx, AV_LOG_ERROR,
702                    "The specified picture size of %dx%d is not valid for the "
703                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
704                     s->width, s->height);
705             return -1;
706         }
707         s->out_format = FMT_H261;
708         avctx->delay  = 0;
709         s->low_delay  = 1;
710         break;
711     case CODEC_ID_H263:
712         if (!CONFIG_H263_ENCODER)
713             return -1;
714         if (ff_match_2uint16(h263_format, FF_ARRAY_ELEMS(h263_format),
715                              s->width, s->height) == 8) {
716             av_log(avctx, AV_LOG_ERROR,
717                    "The specified picture size of %dx%d is not valid for "
718                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
719                    "352x288, 704x576, and 1408x1152. "
720                    "Try H.263+.\n", s->width, s->height);
721             return -1;
722         }
723         s->out_format = FMT_H263;
724         avctx->delay  = 0;
725         s->low_delay  = 1;
726         break;
727     case CODEC_ID_H263P:
728         s->out_format = FMT_H263;
729         s->h263_plus  = 1;
730         /* Fx */
731 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
732         if (avctx->flags & CODEC_FLAG_H263P_UMV)
733             s->umvplus = 1;
734         if (avctx->flags & CODEC_FLAG_H263P_AIV)
735             s->alt_inter_vlc = 1;
736         if (avctx->flags & CODEC_FLAG_H263P_SLICE_STRUCT)
737             s->h263_slice_structured = 1;
738 #endif
739         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
740         s->modified_quant  = s->h263_aic;
741         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
742         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
743
744         /* /Fx */
745         /* These are just to be sure */
746         avctx->delay = 0;
747         s->low_delay = 1;
748         break;
749     case CODEC_ID_FLV1:
750         s->out_format      = FMT_H263;
751         s->h263_flv        = 2; /* format = 1; 11-bit codes */
752         s->unrestricted_mv = 1;
753         s->rtp_mode  = 0; /* don't allow GOB */
754         avctx->delay = 0;
755         s->low_delay = 1;
756         break;
757     case CODEC_ID_RV10:
758         s->out_format = FMT_H263;
759         avctx->delay  = 0;
760         s->low_delay  = 1;
761         break;
762     case CODEC_ID_RV20:
763         s->out_format      = FMT_H263;
764         avctx->delay       = 0;
765         s->low_delay       = 1;
766         s->modified_quant  = 1;
767         s->h263_aic        = 1;
768         s->h263_plus       = 1;
769         s->loop_filter     = 1;
770         s->unrestricted_mv = 0;
771         break;
772     case CODEC_ID_MPEG4:
773         s->out_format      = FMT_H263;
774         s->h263_pred       = 1;
775         s->unrestricted_mv = 1;
776         s->low_delay       = s->max_b_frames ? 0 : 1;
777         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
778         break;
779     case CODEC_ID_MSMPEG4V2:
780         s->out_format      = FMT_H263;
781         s->h263_pred       = 1;
782         s->unrestricted_mv = 1;
783         s->msmpeg4_version = 2;
784         avctx->delay       = 0;
785         s->low_delay       = 1;
786         break;
787     case CODEC_ID_MSMPEG4V3:
788         s->out_format        = FMT_H263;
789         s->h263_pred         = 1;
790         s->unrestricted_mv   = 1;
791         s->msmpeg4_version   = 3;
792         s->flipflop_rounding = 1;
793         avctx->delay         = 0;
794         s->low_delay         = 1;
795         break;
796     case CODEC_ID_WMV1:
797         s->out_format        = FMT_H263;
798         s->h263_pred         = 1;
799         s->unrestricted_mv   = 1;
800         s->msmpeg4_version   = 4;
801         s->flipflop_rounding = 1;
802         avctx->delay         = 0;
803         s->low_delay         = 1;
804         break;
805     case CODEC_ID_WMV2:
806         s->out_format        = FMT_H263;
807         s->h263_pred         = 1;
808         s->unrestricted_mv   = 1;
809         s->msmpeg4_version   = 5;
810         s->flipflop_rounding = 1;
811         avctx->delay         = 0;
812         s->low_delay         = 1;
813         break;
814     default:
815         return -1;
816     }
817
818     avctx->has_b_frames = !s->low_delay;
819
820     s->encoding = 1;
821
822     s->progressive_frame    =
823     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
824                                                 CODEC_FLAG_INTERLACED_ME) ||
825                                 s->alternate_scan);
826
827     /* init */
828     if (MPV_common_init(s) < 0)
829         return -1;
830
831     if (!s->dct_quantize)
832         s->dct_quantize = dct_quantize_c;
833     if (!s->denoise_dct)
834         s->denoise_dct  = denoise_dct_c;
835     s->fast_dct_quantize = s->dct_quantize;
836     if (avctx->trellis)
837         s->dct_quantize  = dct_quantize_trellis_c;
838
839     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
840         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
841
842     s->quant_precision = 5;
843
844     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
845     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
846
847     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
848         ff_h261_encode_init(s);
849     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
850         h263_encode_init(s);
851     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
852         ff_msmpeg4_encode_init(s);
853     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
854         && s->out_format == FMT_MPEG1)
855         ff_mpeg1_encode_init(s);
856
857     /* init q matrix */
858     for (i = 0; i < 64; i++) {
859         int j = s->dsp.idct_permutation[i];
860         if (CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4 &&
861             s->mpeg_quant) {
862             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
863             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
864         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
865             s->intra_matrix[j] =
866             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
867         } else {
868             /* mpeg1/2 */
869             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
870             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
871         }
872         if (s->avctx->intra_matrix)
873             s->intra_matrix[j] = s->avctx->intra_matrix[i];
874         if (s->avctx->inter_matrix)
875             s->inter_matrix[j] = s->avctx->inter_matrix[i];
876     }
877
878     /* precompute matrix */
879     /* for mjpeg, we do include qscale in the matrix */
880     if (s->out_format != FMT_MJPEG) {
881         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
882                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
883                           31, 1);
884         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
885                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
886                           31, 0);
887     }
888
889     if (ff_rate_control_init(s) < 0)
890         return -1;
891
892     return 0;
893 }
894
895 av_cold int MPV_encode_end(AVCodecContext *avctx)
896 {
897     MpegEncContext *s = avctx->priv_data;
898
899     ff_rate_control_uninit(s);
900
901     MPV_common_end(s);
902     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
903         s->out_format == FMT_MJPEG)
904         ff_mjpeg_encode_close(s);
905
906     av_freep(&avctx->extradata);
907
908     return 0;
909 }
910
911 static int get_sae(uint8_t *src, int ref, int stride)
912 {
913     int x,y;
914     int acc = 0;
915
916     for (y = 0; y < 16; y++) {
917         for (x = 0; x < 16; x++) {
918             acc += FFABS(src[x + y * stride] - ref);
919         }
920     }
921
922     return acc;
923 }
924
925 static int get_intra_count(MpegEncContext *s, uint8_t *src,
926                            uint8_t *ref, int stride)
927 {
928     int x, y, w, h;
929     int acc = 0;
930
931     w = s->width  & ~15;
932     h = s->height & ~15;
933
934     for (y = 0; y < h; y += 16) {
935         for (x = 0; x < w; x += 16) {
936             int offset = x + y * stride;
937             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
938                                      16);
939             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
940             int sae  = get_sae(src + offset, mean, stride);
941
942             acc += sae + 500 < sad;
943         }
944     }
945     return acc;
946 }
947
948
949 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
950 {
951     AVFrame *pic = NULL;
952     int64_t pts;
953     int i;
954     const int encoding_delay = s->max_b_frames;
955     int direct = 1;
956
957     if (pic_arg) {
958         pts = pic_arg->pts;
959         pic_arg->display_picture_number = s->input_picture_number++;
960
961         if (pts != AV_NOPTS_VALUE) {
962             if (s->user_specified_pts != AV_NOPTS_VALUE) {
963                 int64_t time = pts;
964                 int64_t last = s->user_specified_pts;
965
966                 if (time <= last) {
967                     av_log(s->avctx, AV_LOG_ERROR,
968                            "Error, Invalid timestamp=%"PRId64", "
969                            "last=%"PRId64"\n", pts, s->user_specified_pts);
970                     return -1;
971                 }
972             }
973             s->user_specified_pts = pts;
974         } else {
975             if (s->user_specified_pts != AV_NOPTS_VALUE) {
976                 s->user_specified_pts =
977                 pts = s->user_specified_pts + 1;
978                 av_log(s->avctx, AV_LOG_INFO,
979                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
980                        pts);
981             } else {
982                 pts = pic_arg->display_picture_number;
983             }
984         }
985     }
986
987   if (pic_arg) {
988     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
989         direct = 0;
990     if (pic_arg->linesize[0] != s->linesize)
991         direct = 0;
992     if (pic_arg->linesize[1] != s->uvlinesize)
993         direct = 0;
994     if (pic_arg->linesize[2] != s->uvlinesize)
995         direct = 0;
996
997     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
998     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
999
1000     if (direct) {
1001         i = ff_find_unused_picture(s, 1);
1002         if (i < 0)
1003             return i;
1004
1005         pic = (AVFrame *) &s->picture[i];
1006         pic->reference = 3;
1007
1008         for (i = 0; i < 4; i++) {
1009             pic->data[i]     = pic_arg->data[i];
1010             pic->linesize[i] = pic_arg->linesize[i];
1011         }
1012         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
1013             return -1;
1014         }
1015     } else {
1016         i = ff_find_unused_picture(s, 0);
1017         if (i < 0)
1018             return i;
1019
1020         pic = (AVFrame *) &s->picture[i];
1021         pic->reference = 3;
1022
1023         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
1024             return -1;
1025         }
1026
1027         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1028             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1029             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1030             // empty
1031         } else {
1032             int h_chroma_shift, v_chroma_shift;
1033             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
1034                                           &v_chroma_shift);
1035
1036             for (i = 0; i < 3; i++) {
1037                 int src_stride = pic_arg->linesize[i];
1038                 int dst_stride = i ? s->uvlinesize : s->linesize;
1039                 int h_shift = i ? h_chroma_shift : 0;
1040                 int v_shift = i ? v_chroma_shift : 0;
1041                 int w = s->width  >> h_shift;
1042                 int h = s->height >> v_shift;
1043                 uint8_t *src = pic_arg->data[i];
1044                 uint8_t *dst = pic->data[i];
1045
1046                 if(s->codec_id == CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)){
1047                     h= ((s->height+15)/16*16)>>v_shift;
1048                 }
1049
1050                 if (!s->avctx->rc_buffer_size)
1051                     dst += INPLACE_OFFSET;
1052
1053                 if (src_stride == dst_stride)
1054                     memcpy(dst, src, src_stride * h);
1055                 else {
1056                     while (h--) {
1057                         memcpy(dst, src, w);
1058                         dst += dst_stride;
1059                         src += src_stride;
1060                     }
1061                 }
1062             }
1063         }
1064     }
1065     copy_picture_attributes(s, pic, pic_arg);
1066     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1067   }
1068
1069     /* shift buffer entries */
1070     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1071         s->input_picture[i - 1] = s->input_picture[i];
1072
1073     s->input_picture[encoding_delay] = (Picture*) pic;
1074
1075     return 0;
1076 }
1077
1078 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1079 {
1080     int x, y, plane;
1081     int score = 0;
1082     int64_t score64 = 0;
1083
1084     for (plane = 0; plane < 3; plane++) {
1085         const int stride = p->f.linesize[plane];
1086         const int bw = plane ? 1 : 2;
1087         for (y = 0; y < s->mb_height * bw; y++) {
1088             for (x = 0; x < s->mb_width * bw; x++) {
1089                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1090                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1091                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1092                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1093
1094                 switch (s->avctx->frame_skip_exp) {
1095                 case 0: score    =  FFMAX(score, v);          break;
1096                 case 1: score   += FFABS(v);                  break;
1097                 case 2: score   += v * v;                     break;
1098                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1099                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1100                 }
1101             }
1102         }
1103     }
1104
1105     if (score)
1106         score64 = score;
1107
1108     if (score64 < s->avctx->frame_skip_threshold)
1109         return 1;
1110     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1111         return 1;
1112     return 0;
1113 }
1114
1115 static int estimate_best_b_count(MpegEncContext *s)
1116 {
1117     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1118     AVCodecContext *c = avcodec_alloc_context3(NULL);
1119     AVFrame input[FF_MAX_B_FRAMES + 2];
1120     const int scale = s->avctx->brd_scale;
1121     int i, j, out_size, p_lambda, b_lambda, lambda2;
1122     int outbuf_size  = s->width * s->height; // FIXME
1123     uint8_t *outbuf  = av_malloc(outbuf_size);
1124     int64_t best_rd  = INT64_MAX;
1125     int best_b_count = -1;
1126
1127     assert(scale >= 0 && scale <= 3);
1128
1129     //emms_c();
1130     //s->next_picture_ptr->quality;
1131     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1132     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1133     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1134     if (!b_lambda) // FIXME we should do this somewhere else
1135         b_lambda = p_lambda;
1136     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1137                FF_LAMBDA_SHIFT;
1138
1139     c->width        = s->width  >> scale;
1140     c->height       = s->height >> scale;
1141     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1142                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1143     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1144     c->mb_decision  = s->avctx->mb_decision;
1145     c->me_cmp       = s->avctx->me_cmp;
1146     c->mb_cmp       = s->avctx->mb_cmp;
1147     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1148     c->pix_fmt      = PIX_FMT_YUV420P;
1149     c->time_base    = s->avctx->time_base;
1150     c->max_b_frames = s->max_b_frames;
1151
1152     if (avcodec_open2(c, codec, NULL) < 0)
1153         return -1;
1154
1155     for (i = 0; i < s->max_b_frames + 2; i++) {
1156         int ysize = c->width * c->height;
1157         int csize = (c->width / 2) * (c->height / 2);
1158         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1159                                                 s->next_picture_ptr;
1160
1161         avcodec_get_frame_defaults(&input[i]);
1162         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1163         input[i].data[1]     = input[i].data[0] + ysize;
1164         input[i].data[2]     = input[i].data[1] + csize;
1165         input[i].linesize[0] = c->width;
1166         input[i].linesize[1] =
1167         input[i].linesize[2] = c->width / 2;
1168
1169         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1170             pre_input = *pre_input_ptr;
1171
1172             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1173                 pre_input.f.data[0] += INPLACE_OFFSET;
1174                 pre_input.f.data[1] += INPLACE_OFFSET;
1175                 pre_input.f.data[2] += INPLACE_OFFSET;
1176             }
1177
1178             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1179                                  pre_input.f.data[0], pre_input.f.linesize[0],
1180                                  c->width,      c->height);
1181             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1182                                  pre_input.f.data[1], pre_input.f.linesize[1],
1183                                  c->width >> 1, c->height >> 1);
1184             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1185                                  pre_input.f.data[2], pre_input.f.linesize[2],
1186                                  c->width >> 1, c->height >> 1);
1187         }
1188     }
1189
1190     for (j = 0; j < s->max_b_frames + 1; j++) {
1191         int64_t rd = 0;
1192
1193         if (!s->input_picture[j])
1194             break;
1195
1196         c->error[0] = c->error[1] = c->error[2] = 0;
1197
1198         input[0].pict_type = AV_PICTURE_TYPE_I;
1199         input[0].quality   = 1 * FF_QP2LAMBDA;
1200         out_size           = avcodec_encode_video(c, outbuf,
1201                                                   outbuf_size, &input[0]);
1202         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1203
1204         for (i = 0; i < s->max_b_frames + 1; i++) {
1205             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1206
1207             input[i + 1].pict_type = is_p ?
1208                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1209             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1210             out_size = avcodec_encode_video(c, outbuf, outbuf_size,
1211                                             &input[i + 1]);
1212             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1213         }
1214
1215         /* get the delayed frames */
1216         while (out_size) {
1217             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
1218             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1219         }
1220
1221         rd += c->error[0] + c->error[1] + c->error[2];
1222
1223         if (rd < best_rd) {
1224             best_rd = rd;
1225             best_b_count = j;
1226         }
1227     }
1228
1229     av_freep(&outbuf);
1230     avcodec_close(c);
1231     av_freep(&c);
1232
1233     for (i = 0; i < s->max_b_frames + 2; i++) {
1234         av_freep(&input[i].data[0]);
1235     }
1236
1237     return best_b_count;
1238 }
1239
1240 static int select_input_picture(MpegEncContext *s)
1241 {
1242     int i;
1243
1244     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1245         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1246     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1247
1248     /* set next picture type & ordering */
1249     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1250         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1251             s->next_picture_ptr == NULL || s->intra_only) {
1252             s->reordered_input_picture[0] = s->input_picture[0];
1253             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1254             s->reordered_input_picture[0]->f.coded_picture_number =
1255                 s->coded_picture_number++;
1256         } else {
1257             int b_frames;
1258
1259             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1260                 if (s->picture_in_gop_number < s->gop_size &&
1261                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1262                     // FIXME check that te gop check above is +-1 correct
1263                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1264                     //       s->input_picture[0]->f.data[0],
1265                     //       s->input_picture[0]->pts);
1266
1267                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1268                         for (i = 0; i < 4; i++)
1269                             s->input_picture[0]->f.data[i] = NULL;
1270                         s->input_picture[0]->f.type = 0;
1271                     } else {
1272                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1273                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1274
1275                         s->avctx->release_buffer(s->avctx,
1276                                                  (AVFrame *) s->input_picture[0]);
1277                     }
1278
1279                     emms_c();
1280                     ff_vbv_update(s, 0);
1281
1282                     goto no_output_pic;
1283                 }
1284             }
1285
1286             if (s->flags & CODEC_FLAG_PASS2) {
1287                 for (i = 0; i < s->max_b_frames + 1; i++) {
1288                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1289
1290                     if (pict_num >= s->rc_context.num_entries)
1291                         break;
1292                     if (!s->input_picture[i]) {
1293                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1294                         break;
1295                     }
1296
1297                     s->input_picture[i]->f.pict_type =
1298                         s->rc_context.entry[pict_num].new_pict_type;
1299                 }
1300             }
1301
1302             if (s->avctx->b_frame_strategy == 0) {
1303                 b_frames = s->max_b_frames;
1304                 while (b_frames && !s->input_picture[b_frames])
1305                     b_frames--;
1306             } else if (s->avctx->b_frame_strategy == 1) {
1307                 for (i = 1; i < s->max_b_frames + 1; i++) {
1308                     if (s->input_picture[i] &&
1309                         s->input_picture[i]->b_frame_score == 0) {
1310                         s->input_picture[i]->b_frame_score =
1311                             get_intra_count(s,
1312                                             s->input_picture[i    ]->f.data[0],
1313                                             s->input_picture[i - 1]->f.data[0],
1314                                             s->linesize) + 1;
1315                     }
1316                 }
1317                 for (i = 0; i < s->max_b_frames + 1; i++) {
1318                     if (s->input_picture[i] == NULL ||
1319                         s->input_picture[i]->b_frame_score - 1 >
1320                             s->mb_num / s->avctx->b_sensitivity)
1321                         break;
1322                 }
1323
1324                 b_frames = FFMAX(0, i - 1);
1325
1326                 /* reset scores */
1327                 for (i = 0; i < b_frames + 1; i++) {
1328                     s->input_picture[i]->b_frame_score = 0;
1329                 }
1330             } else if (s->avctx->b_frame_strategy == 2) {
1331                 b_frames = estimate_best_b_count(s);
1332             } else {
1333                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1334                 b_frames = 0;
1335             }
1336
1337             emms_c();
1338             //static int b_count = 0;
1339             //b_count += b_frames;
1340             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1341
1342             for (i = b_frames - 1; i >= 0; i--) {
1343                 int type = s->input_picture[i]->f.pict_type;
1344                 if (type && type != AV_PICTURE_TYPE_B)
1345                     b_frames = i;
1346             }
1347             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1348                 b_frames == s->max_b_frames) {
1349                 av_log(s->avctx, AV_LOG_ERROR,
1350                        "warning, too many b frames in a row\n");
1351             }
1352
1353             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1354                 if ((s->flags2 & CODEC_FLAG2_STRICT_GOP) &&
1355                     s->gop_size > s->picture_in_gop_number) {
1356                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1357                 } else {
1358                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1359                         b_frames = 0;
1360                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1361                 }
1362             }
1363
1364             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1365                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1366                 b_frames--;
1367
1368             s->reordered_input_picture[0] = s->input_picture[b_frames];
1369             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1370                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1371             s->reordered_input_picture[0]->f.coded_picture_number =
1372                 s->coded_picture_number++;
1373             for (i = 0; i < b_frames; i++) {
1374                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1375                 s->reordered_input_picture[i + 1]->f.pict_type =
1376                     AV_PICTURE_TYPE_B;
1377                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1378                     s->coded_picture_number++;
1379             }
1380         }
1381     }
1382 no_output_pic:
1383     if (s->reordered_input_picture[0]) {
1384         s->reordered_input_picture[0]->f.reference =
1385            s->reordered_input_picture[0]->f.pict_type !=
1386                AV_PICTURE_TYPE_B ? 3 : 0;
1387
1388         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1389
1390         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1391             s->avctx->rc_buffer_size) {
1392             // input is a shared pix, so we can't modifiy it -> alloc a new
1393             // one & ensure that the shared one is reuseable
1394
1395             Picture *pic;
1396             int i = ff_find_unused_picture(s, 0);
1397             if (i < 0)
1398                 return i;
1399             pic = &s->picture[i];
1400
1401             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1402             if (ff_alloc_picture(s, pic, 0) < 0) {
1403                 return -1;
1404             }
1405
1406             /* mark us unused / free shared pic */
1407             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1408                 s->avctx->release_buffer(s->avctx,
1409                                          (AVFrame *) s->reordered_input_picture[0]);
1410             for (i = 0; i < 4; i++)
1411                 s->reordered_input_picture[0]->f.data[i] = NULL;
1412             s->reordered_input_picture[0]->f.type = 0;
1413
1414             copy_picture_attributes(s, (AVFrame *) pic,
1415                                     (AVFrame *) s->reordered_input_picture[0]);
1416
1417             s->current_picture_ptr = pic;
1418         } else {
1419             // input is not a shared pix -> reuse buffer for current_pix
1420
1421             assert(s->reordered_input_picture[0]->f.type ==
1422                        FF_BUFFER_TYPE_USER ||
1423                    s->reordered_input_picture[0]->f.type ==
1424                        FF_BUFFER_TYPE_INTERNAL);
1425
1426             s->current_picture_ptr = s->reordered_input_picture[0];
1427             for (i = 0; i < 4; i++) {
1428                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1429             }
1430         }
1431         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1432
1433         s->picture_number = s->new_picture.f.display_picture_number;
1434         //printf("dpn:%d\n", s->picture_number);
1435     } else {
1436         memset(&s->new_picture, 0, sizeof(Picture));
1437     }
1438     return 0;
1439 }
1440
1441 int MPV_encode_picture(AVCodecContext *avctx,
1442                        unsigned char *buf, int buf_size, void *data)
1443 {
1444     MpegEncContext *s = avctx->priv_data;
1445     AVFrame *pic_arg  = data;
1446     int i, stuffing_count;
1447     int context_count = s->slice_context_count;
1448
1449     for (i = 0; i < context_count; i++) {
1450         int start_y = s->thread_context[i]->start_mb_y;
1451         int   end_y = s->thread_context[i]->  end_mb_y;
1452         int h       = s->mb_height;
1453         uint8_t *start = buf + (size_t)(((int64_t) buf_size) * start_y / h);
1454         uint8_t *end   = buf + (size_t)(((int64_t) buf_size) *   end_y / h);
1455
1456         init_put_bits(&s->thread_context[i]->pb, start, end - start);
1457     }
1458
1459     s->picture_in_gop_number++;
1460
1461     if (load_input_picture(s, pic_arg) < 0)
1462         return -1;
1463
1464     if (select_input_picture(s) < 0) {
1465         return -1;
1466     }
1467
1468     /* output? */
1469     if (s->new_picture.f.data[0]) {
1470         s->pict_type = s->new_picture.f.pict_type;
1471         //emms_c();
1472         //printf("qs:%f %f %d\n", s->new_picture.quality,
1473         //       s->current_picture.quality, s->qscale);
1474         MPV_frame_start(s, avctx);
1475 vbv_retry:
1476         if (encode_picture(s, s->picture_number) < 0)
1477             return -1;
1478
1479         avctx->header_bits = s->header_bits;
1480         avctx->mv_bits     = s->mv_bits;
1481         avctx->misc_bits   = s->misc_bits;
1482         avctx->i_tex_bits  = s->i_tex_bits;
1483         avctx->p_tex_bits  = s->p_tex_bits;
1484         avctx->i_count     = s->i_count;
1485         // FIXME f/b_count in avctx
1486         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1487         avctx->skip_count  = s->skip_count;
1488
1489         MPV_frame_end(s);
1490
1491         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1492             ff_mjpeg_encode_picture_trailer(s);
1493
1494         if (avctx->rc_buffer_size) {
1495             RateControlContext *rcc = &s->rc_context;
1496             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1497
1498             if (put_bits_count(&s->pb) > max_size &&
1499                 s->lambda < s->avctx->lmax) {
1500                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1501                                        (s->qscale + 1) / s->qscale);
1502                 if (s->adaptive_quant) {
1503                     int i;
1504                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1505                         s->lambda_table[i] =
1506                             FFMAX(s->lambda_table[i] + 1,
1507                                   s->lambda_table[i] * (s->qscale + 1) /
1508                                   s->qscale);
1509                 }
1510                 s->mb_skipped = 0;        // done in MPV_frame_start()
1511                 // done in encode_picture() so we must undo it
1512                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1513                     if (s->flipflop_rounding          ||
1514                         s->codec_id == CODEC_ID_H263P ||
1515                         s->codec_id == CODEC_ID_MPEG4)
1516                         s->no_rounding ^= 1;
1517                 }
1518                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1519                     s->time_base       = s->last_time_base;
1520                     s->last_non_b_time = s->time - s->pp_time;
1521                 }
1522                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1523                 for (i = 0; i < context_count; i++) {
1524                     PutBitContext *pb = &s->thread_context[i]->pb;
1525                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1526                 }
1527                 goto vbv_retry;
1528             }
1529
1530             assert(s->avctx->rc_max_rate);
1531         }
1532
1533         if (s->flags & CODEC_FLAG_PASS1)
1534             ff_write_pass1_stats(s);
1535
1536         for (i = 0; i < 4; i++) {
1537             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1538             avctx->error[i] += s->current_picture_ptr->f.error[i];
1539         }
1540
1541         if (s->flags & CODEC_FLAG_PASS1)
1542             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1543                    avctx->i_tex_bits + avctx->p_tex_bits ==
1544                        put_bits_count(&s->pb));
1545         flush_put_bits(&s->pb);
1546         s->frame_bits  = put_bits_count(&s->pb);
1547
1548         stuffing_count = ff_vbv_update(s, s->frame_bits);
1549         if (stuffing_count) {
1550             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1551                     stuffing_count + 50) {
1552                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1553                 return -1;
1554             }
1555
1556             switch (s->codec_id) {
1557             case CODEC_ID_MPEG1VIDEO:
1558             case CODEC_ID_MPEG2VIDEO:
1559                 while (stuffing_count--) {
1560                     put_bits(&s->pb, 8, 0);
1561                 }
1562             break;
1563             case CODEC_ID_MPEG4:
1564                 put_bits(&s->pb, 16, 0);
1565                 put_bits(&s->pb, 16, 0x1C3);
1566                 stuffing_count -= 4;
1567                 while (stuffing_count--) {
1568                     put_bits(&s->pb, 8, 0xFF);
1569                 }
1570             break;
1571             default:
1572                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1573             }
1574             flush_put_bits(&s->pb);
1575             s->frame_bits  = put_bits_count(&s->pb);
1576         }
1577
1578         /* update mpeg1/2 vbv_delay for CBR */
1579         if (s->avctx->rc_max_rate                          &&
1580             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1581             s->out_format == FMT_MPEG1                     &&
1582             90000LL * (avctx->rc_buffer_size - 1) <=
1583                 s->avctx->rc_max_rate * 0xFFFFLL) {
1584             int vbv_delay, min_delay;
1585             double inbits  = s->avctx->rc_max_rate *
1586                              av_q2d(s->avctx->time_base);
1587             int    minbits = s->frame_bits - 8 *
1588                              (s->vbv_delay_ptr - s->pb.buf - 1);
1589             double bits    = s->rc_context.buffer_index + minbits - inbits;
1590
1591             if (bits < 0)
1592                 av_log(s->avctx, AV_LOG_ERROR,
1593                        "Internal error, negative bits\n");
1594
1595             assert(s->repeat_first_field == 0);
1596
1597             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1598             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1599                         s->avctx->rc_max_rate;
1600
1601             vbv_delay = FFMAX(vbv_delay, min_delay);
1602
1603             assert(vbv_delay < 0xFFFF);
1604
1605             s->vbv_delay_ptr[0] &= 0xF8;
1606             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1607             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1608             s->vbv_delay_ptr[2] &= 0x07;
1609             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1610             avctx->vbv_delay     = vbv_delay * 300;
1611         }
1612         s->total_bits     += s->frame_bits;
1613         avctx->frame_bits  = s->frame_bits;
1614     } else {
1615         assert((put_bits_ptr(&s->pb) == s->pb.buf));
1616         s->frame_bits = 0;
1617     }
1618     assert((s->frame_bits & 7) == 0);
1619
1620     return s->frame_bits / 8;
1621 }
1622
1623 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1624                                                 int n, int threshold)
1625 {
1626     static const char tab[64] = {
1627         3, 2, 2, 1, 1, 1, 1, 1,
1628         1, 1, 1, 1, 1, 1, 1, 1,
1629         1, 1, 1, 1, 1, 1, 1, 1,
1630         0, 0, 0, 0, 0, 0, 0, 0,
1631         0, 0, 0, 0, 0, 0, 0, 0,
1632         0, 0, 0, 0, 0, 0, 0, 0,
1633         0, 0, 0, 0, 0, 0, 0, 0,
1634         0, 0, 0, 0, 0, 0, 0, 0
1635     };
1636     int score = 0;
1637     int run = 0;
1638     int i;
1639     DCTELEM *block = s->block[n];
1640     const int last_index = s->block_last_index[n];
1641     int skip_dc;
1642
1643     if (threshold < 0) {
1644         skip_dc = 0;
1645         threshold = -threshold;
1646     } else
1647         skip_dc = 1;
1648
1649     /* Are all we could set to zero already zero? */
1650     if (last_index <= skip_dc - 1)
1651         return;
1652
1653     for (i = 0; i <= last_index; i++) {
1654         const int j = s->intra_scantable.permutated[i];
1655         const int level = FFABS(block[j]);
1656         if (level == 1) {
1657             if (skip_dc && i == 0)
1658                 continue;
1659             score += tab[run];
1660             run = 0;
1661         } else if (level > 1) {
1662             return;
1663         } else {
1664             run++;
1665         }
1666     }
1667     if (score >= threshold)
1668         return;
1669     for (i = skip_dc; i <= last_index; i++) {
1670         const int j = s->intra_scantable.permutated[i];
1671         block[j] = 0;
1672     }
1673     if (block[0])
1674         s->block_last_index[n] = 0;
1675     else
1676         s->block_last_index[n] = -1;
1677 }
1678
1679 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1680                                int last_index)
1681 {
1682     int i;
1683     const int maxlevel = s->max_qcoeff;
1684     const int minlevel = s->min_qcoeff;
1685     int overflow = 0;
1686
1687     if (s->mb_intra) {
1688         i = 1; // skip clipping of intra dc
1689     } else
1690         i = 0;
1691
1692     for (; i <= last_index; i++) {
1693         const int j = s->intra_scantable.permutated[i];
1694         int level = block[j];
1695
1696         if (level > maxlevel) {
1697             level = maxlevel;
1698             overflow++;
1699         } else if (level < minlevel) {
1700             level = minlevel;
1701             overflow++;
1702         }
1703
1704         block[j] = level;
1705     }
1706
1707     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1708         av_log(s->avctx, AV_LOG_INFO,
1709                "warning, clipping %d dct coefficients to %d..%d\n",
1710                overflow, minlevel, maxlevel);
1711 }
1712
1713 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1714 {
1715     int x, y;
1716     // FIXME optimize
1717     for (y = 0; y < 8; y++) {
1718         for (x = 0; x < 8; x++) {
1719             int x2, y2;
1720             int sum = 0;
1721             int sqr = 0;
1722             int count = 0;
1723
1724             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1725                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1726                     int v = ptr[x2 + y2 * stride];
1727                     sum += v;
1728                     sqr += v * v;
1729                     count++;
1730                 }
1731             }
1732             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1733         }
1734     }
1735 }
1736
1737 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1738                                                 int motion_x, int motion_y,
1739                                                 int mb_block_height,
1740                                                 int mb_block_count)
1741 {
1742     int16_t weight[8][64];
1743     DCTELEM orig[8][64];
1744     const int mb_x = s->mb_x;
1745     const int mb_y = s->mb_y;
1746     int i;
1747     int skip_dct[8];
1748     int dct_offset = s->linesize * 8; // default for progressive frames
1749     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1750     int wrap_y, wrap_c;
1751
1752     for (i = 0; i < mb_block_count; i++)
1753         skip_dct[i] = s->skipdct;
1754
1755     if (s->adaptive_quant) {
1756         const int last_qp = s->qscale;
1757         const int mb_xy = mb_x + mb_y * s->mb_stride;
1758
1759         s->lambda = s->lambda_table[mb_xy];
1760         update_qscale(s);
1761
1762         if (!(s->flags & CODEC_FLAG_QP_RD)) {
1763             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1764             s->dquant = s->qscale - last_qp;
1765
1766             if (s->out_format == FMT_H263) {
1767                 s->dquant = av_clip(s->dquant, -2, 2);
1768
1769                 if (s->codec_id == CODEC_ID_MPEG4) {
1770                     if (!s->mb_intra) {
1771                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1772                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1773                                 s->dquant = 0;
1774                         }
1775                         if (s->mv_type == MV_TYPE_8X8)
1776                             s->dquant = 0;
1777                     }
1778                 }
1779             }
1780         }
1781         ff_set_qscale(s, last_qp + s->dquant);
1782     } else if (s->flags & CODEC_FLAG_QP_RD)
1783         ff_set_qscale(s, s->qscale + s->dquant);
1784
1785     wrap_y = s->linesize;
1786     wrap_c = s->uvlinesize;
1787     ptr_y  = s->new_picture.f.data[0] +
1788              (mb_y * 16 * wrap_y)              + mb_x * 16;
1789     ptr_cb = s->new_picture.f.data[1] +
1790              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1791     ptr_cr = s->new_picture.f.data[2] +
1792              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1793
1794     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != CODEC_ID_AMV){
1795         uint8_t *ebuf = s->edge_emu_buffer + 32;
1796         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1797                                 mb_y * 16, s->width, s->height);
1798         ptr_y = ebuf;
1799         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1800                                 mb_block_height, mb_x * 8, mb_y * 8,
1801                                 s->width >> 1, s->height >> 1);
1802         ptr_cb = ebuf + 18 * wrap_y;
1803         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1804                                 mb_block_height, mb_x * 8, mb_y * 8,
1805                                 s->width >> 1, s->height >> 1);
1806         ptr_cr = ebuf + 18 * wrap_y + 8;
1807     }
1808
1809     if (s->mb_intra) {
1810         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1811             int progressive_score, interlaced_score;
1812
1813             s->interlaced_dct = 0;
1814             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1815                                                     NULL, wrap_y, 8) +
1816                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1817                                                     NULL, wrap_y, 8) - 400;
1818
1819             if (progressive_score > 0) {
1820                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1821                                                        NULL, wrap_y * 2, 8) +
1822                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1823                                                        NULL, wrap_y * 2, 8);
1824                 if (progressive_score > interlaced_score) {
1825                     s->interlaced_dct = 1;
1826
1827                     dct_offset = wrap_y;
1828                     wrap_y <<= 1;
1829                     if (s->chroma_format == CHROMA_422)
1830                         wrap_c <<= 1;
1831                 }
1832             }
1833         }
1834
1835         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1836         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1837         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1838         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1839
1840         if (s->flags & CODEC_FLAG_GRAY) {
1841             skip_dct[4] = 1;
1842             skip_dct[5] = 1;
1843         } else {
1844             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1845             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1846             if (!s->chroma_y_shift) { /* 422 */
1847                 s->dsp.get_pixels(s->block[6],
1848                                   ptr_cb + (dct_offset >> 1), wrap_c);
1849                 s->dsp.get_pixels(s->block[7],
1850                                   ptr_cr + (dct_offset >> 1), wrap_c);
1851             }
1852         }
1853     } else {
1854         op_pixels_func (*op_pix)[4];
1855         qpel_mc_func (*op_qpix)[16];
1856         uint8_t *dest_y, *dest_cb, *dest_cr;
1857
1858         dest_y  = s->dest[0];
1859         dest_cb = s->dest[1];
1860         dest_cr = s->dest[2];
1861
1862         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1863             op_pix  = s->dsp.put_pixels_tab;
1864             op_qpix = s->dsp.put_qpel_pixels_tab;
1865         } else {
1866             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1867             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1868         }
1869
1870         if (s->mv_dir & MV_DIR_FORWARD) {
1871             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data,
1872                        op_pix, op_qpix);
1873             op_pix  = s->dsp.avg_pixels_tab;
1874             op_qpix = s->dsp.avg_qpel_pixels_tab;
1875         }
1876         if (s->mv_dir & MV_DIR_BACKWARD) {
1877             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data,
1878                        op_pix, op_qpix);
1879         }
1880
1881         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1882             int progressive_score, interlaced_score;
1883
1884             s->interlaced_dct = 0;
1885             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1886                                                     ptr_y,              wrap_y,
1887                                                     8) +
1888                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1889                                                     ptr_y + wrap_y * 8, wrap_y,
1890                                                     8) - 400;
1891
1892             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1893                 progressive_score -= 400;
1894
1895             if (progressive_score > 0) {
1896                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1897                                                        ptr_y,
1898                                                        wrap_y * 2, 8) +
1899                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1900                                                        ptr_y + wrap_y,
1901                                                        wrap_y * 2, 8);
1902
1903                 if (progressive_score > interlaced_score) {
1904                     s->interlaced_dct = 1;
1905
1906                     dct_offset = wrap_y;
1907                     wrap_y <<= 1;
1908                     if (s->chroma_format == CHROMA_422)
1909                         wrap_c <<= 1;
1910                 }
1911             }
1912         }
1913
1914         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1915         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1916         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1917                            dest_y + dct_offset, wrap_y);
1918         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1919                            dest_y + dct_offset + 8, wrap_y);
1920
1921         if (s->flags & CODEC_FLAG_GRAY) {
1922             skip_dct[4] = 1;
1923             skip_dct[5] = 1;
1924         } else {
1925             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1926             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1927             if (!s->chroma_y_shift) { /* 422 */
1928                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1929                                    dest_cb + (dct_offset >> 1), wrap_c);
1930                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1931                                    dest_cr + (dct_offset >> 1), wrap_c);
1932             }
1933         }
1934         /* pre quantization */
1935         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1936                 2 * s->qscale * s->qscale) {
1937             // FIXME optimize
1938             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1939                               wrap_y, 8) < 20 * s->qscale)
1940                 skip_dct[0] = 1;
1941             if (s->dsp.sad[1](NULL, ptr_y + 8,
1942                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1943                 skip_dct[1] = 1;
1944             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1945                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1946                 skip_dct[2] = 1;
1947             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1948                               dest_y + dct_offset + 8,
1949                               wrap_y, 8) < 20 * s->qscale)
1950                 skip_dct[3] = 1;
1951             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1952                               wrap_c, 8) < 20 * s->qscale)
1953                 skip_dct[4] = 1;
1954             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1955                               wrap_c, 8) < 20 * s->qscale)
1956                 skip_dct[5] = 1;
1957             if (!s->chroma_y_shift) { /* 422 */
1958                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1959                                   dest_cb + (dct_offset >> 1),
1960                                   wrap_c, 8) < 20 * s->qscale)
1961                     skip_dct[6] = 1;
1962                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1963                                   dest_cr + (dct_offset >> 1),
1964                                   wrap_c, 8) < 20 * s->qscale)
1965                     skip_dct[7] = 1;
1966             }
1967         }
1968     }
1969
1970     if (s->avctx->quantizer_noise_shaping) {
1971         if (!skip_dct[0])
1972             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1973         if (!skip_dct[1])
1974             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1975         if (!skip_dct[2])
1976             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1977         if (!skip_dct[3])
1978             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1979         if (!skip_dct[4])
1980             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1981         if (!skip_dct[5])
1982             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1983         if (!s->chroma_y_shift) { /* 422 */
1984             if (!skip_dct[6])
1985                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1986                                   wrap_c);
1987             if (!skip_dct[7])
1988                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1989                                   wrap_c);
1990         }
1991         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
1992     }
1993
1994     /* DCT & quantize */
1995     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1996     {
1997         for (i = 0; i < mb_block_count; i++) {
1998             if (!skip_dct[i]) {
1999                 int overflow;
2000                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2001                 // FIXME we could decide to change to quantizer instead of
2002                 // clipping
2003                 // JS: I don't think that would be a good idea it could lower
2004                 //     quality instead of improve it. Just INTRADC clipping
2005                 //     deserves changes in quantizer
2006                 if (overflow)
2007                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2008             } else
2009                 s->block_last_index[i] = -1;
2010         }
2011         if (s->avctx->quantizer_noise_shaping) {
2012             for (i = 0; i < mb_block_count; i++) {
2013                 if (!skip_dct[i]) {
2014                     s->block_last_index[i] =
2015                         dct_quantize_refine(s, s->block[i], weight[i],
2016                                             orig[i], i, s->qscale);
2017                 }
2018             }
2019         }
2020
2021         if (s->luma_elim_threshold && !s->mb_intra)
2022             for (i = 0; i < 4; i++)
2023                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2024         if (s->chroma_elim_threshold && !s->mb_intra)
2025             for (i = 4; i < mb_block_count; i++)
2026                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2027
2028         if (s->flags & CODEC_FLAG_CBP_RD) {
2029             for (i = 0; i < mb_block_count; i++) {
2030                 if (s->block_last_index[i] == -1)
2031                     s->coded_score[i] = INT_MAX / 256;
2032             }
2033         }
2034     }
2035
2036     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2037         s->block_last_index[4] =
2038         s->block_last_index[5] = 0;
2039         s->block[4][0] =
2040         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2041     }
2042
2043     // non c quantize code returns incorrect block_last_index FIXME
2044     if (s->alternate_scan && s->dct_quantize != dct_quantize_c) {
2045         for (i = 0; i < mb_block_count; i++) {
2046             int j;
2047             if (s->block_last_index[i] > 0) {
2048                 for (j = 63; j > 0; j--) {
2049                     if (s->block[i][s->intra_scantable.permutated[j]])
2050                         break;
2051                 }
2052                 s->block_last_index[i] = j;
2053             }
2054         }
2055     }
2056
2057     /* huffman encode */
2058     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2059     case CODEC_ID_MPEG1VIDEO:
2060     case CODEC_ID_MPEG2VIDEO:
2061         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2062             mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2063         break;
2064     case CODEC_ID_MPEG4:
2065         if (CONFIG_MPEG4_ENCODER)
2066             mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2067         break;
2068     case CODEC_ID_MSMPEG4V2:
2069     case CODEC_ID_MSMPEG4V3:
2070     case CODEC_ID_WMV1:
2071         if (CONFIG_MSMPEG4_ENCODER)
2072             msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2073         break;
2074     case CODEC_ID_WMV2:
2075         if (CONFIG_WMV2_ENCODER)
2076             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2077         break;
2078     case CODEC_ID_H261:
2079         if (CONFIG_H261_ENCODER)
2080             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2081         break;
2082     case CODEC_ID_H263:
2083     case CODEC_ID_H263P:
2084     case CODEC_ID_FLV1:
2085     case CODEC_ID_RV10:
2086     case CODEC_ID_RV20:
2087         if (CONFIG_H263_ENCODER)
2088             h263_encode_mb(s, s->block, motion_x, motion_y);
2089         break;
2090     case CODEC_ID_MJPEG:
2091     case CODEC_ID_AMV:
2092         if (CONFIG_MJPEG_ENCODER)
2093             ff_mjpeg_encode_mb(s, s->block);
2094         break;
2095     default:
2096         assert(0);
2097     }
2098 }
2099
2100 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2101 {
2102     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2103     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2104 }
2105
2106 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2107     int i;
2108
2109     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2110
2111     /* mpeg1 */
2112     d->mb_skip_run= s->mb_skip_run;
2113     for(i=0; i<3; i++)
2114         d->last_dc[i] = s->last_dc[i];
2115
2116     /* statistics */
2117     d->mv_bits= s->mv_bits;
2118     d->i_tex_bits= s->i_tex_bits;
2119     d->p_tex_bits= s->p_tex_bits;
2120     d->i_count= s->i_count;
2121     d->f_count= s->f_count;
2122     d->b_count= s->b_count;
2123     d->skip_count= s->skip_count;
2124     d->misc_bits= s->misc_bits;
2125     d->last_bits= 0;
2126
2127     d->mb_skipped= 0;
2128     d->qscale= s->qscale;
2129     d->dquant= s->dquant;
2130
2131     d->esc3_level_length= s->esc3_level_length;
2132 }
2133
2134 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2135     int i;
2136
2137     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2138     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2139
2140     /* mpeg1 */
2141     d->mb_skip_run= s->mb_skip_run;
2142     for(i=0; i<3; i++)
2143         d->last_dc[i] = s->last_dc[i];
2144
2145     /* statistics */
2146     d->mv_bits= s->mv_bits;
2147     d->i_tex_bits= s->i_tex_bits;
2148     d->p_tex_bits= s->p_tex_bits;
2149     d->i_count= s->i_count;
2150     d->f_count= s->f_count;
2151     d->b_count= s->b_count;
2152     d->skip_count= s->skip_count;
2153     d->misc_bits= s->misc_bits;
2154
2155     d->mb_intra= s->mb_intra;
2156     d->mb_skipped= s->mb_skipped;
2157     d->mv_type= s->mv_type;
2158     d->mv_dir= s->mv_dir;
2159     d->pb= s->pb;
2160     if(s->data_partitioning){
2161         d->pb2= s->pb2;
2162         d->tex_pb= s->tex_pb;
2163     }
2164     d->block= s->block;
2165     for(i=0; i<8; i++)
2166         d->block_last_index[i]= s->block_last_index[i];
2167     d->interlaced_dct= s->interlaced_dct;
2168     d->qscale= s->qscale;
2169
2170     d->esc3_level_length= s->esc3_level_length;
2171 }
2172
2173 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2174                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2175                            int *dmin, int *next_block, int motion_x, int motion_y)
2176 {
2177     int score;
2178     uint8_t *dest_backup[3];
2179
2180     copy_context_before_encode(s, backup, type);
2181
2182     s->block= s->blocks[*next_block];
2183     s->pb= pb[*next_block];
2184     if(s->data_partitioning){
2185         s->pb2   = pb2   [*next_block];
2186         s->tex_pb= tex_pb[*next_block];
2187     }
2188
2189     if(*next_block){
2190         memcpy(dest_backup, s->dest, sizeof(s->dest));
2191         s->dest[0] = s->rd_scratchpad;
2192         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2193         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2194         assert(s->linesize >= 32); //FIXME
2195     }
2196
2197     encode_mb(s, motion_x, motion_y);
2198
2199     score= put_bits_count(&s->pb);
2200     if(s->data_partitioning){
2201         score+= put_bits_count(&s->pb2);
2202         score+= put_bits_count(&s->tex_pb);
2203     }
2204
2205     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2206         MPV_decode_mb(s, s->block);
2207
2208         score *= s->lambda2;
2209         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2210     }
2211
2212     if(*next_block){
2213         memcpy(s->dest, dest_backup, sizeof(s->dest));
2214     }
2215
2216     if(score<*dmin){
2217         *dmin= score;
2218         *next_block^=1;
2219
2220         copy_context_after_encode(best, s, type);
2221     }
2222 }
2223
2224 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2225     uint32_t *sq = ff_squareTbl + 256;
2226     int acc=0;
2227     int x,y;
2228
2229     if(w==16 && h==16)
2230         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2231     else if(w==8 && h==8)
2232         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2233
2234     for(y=0; y<h; y++){
2235         for(x=0; x<w; x++){
2236             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2237         }
2238     }
2239
2240     assert(acc>=0);
2241
2242     return acc;
2243 }
2244
2245 static int sse_mb(MpegEncContext *s){
2246     int w= 16;
2247     int h= 16;
2248
2249     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2250     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2251
2252     if(w==16 && h==16)
2253       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2254         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2255                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2256                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2257       }else{
2258         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2259                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2260                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2261       }
2262     else
2263         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2264                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2265                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2266 }
2267
2268 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2269     MpegEncContext *s= *(void**)arg;
2270
2271
2272     s->me.pre_pass=1;
2273     s->me.dia_size= s->avctx->pre_dia_size;
2274     s->first_slice_line=1;
2275     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2276         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2277             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2278         }
2279         s->first_slice_line=0;
2280     }
2281
2282     s->me.pre_pass=0;
2283
2284     return 0;
2285 }
2286
2287 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2288     MpegEncContext *s= *(void**)arg;
2289
2290     ff_check_alignment();
2291
2292     s->me.dia_size= s->avctx->dia_size;
2293     s->first_slice_line=1;
2294     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2295         s->mb_x=0; //for block init below
2296         ff_init_block_index(s);
2297         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2298             s->block_index[0]+=2;
2299             s->block_index[1]+=2;
2300             s->block_index[2]+=2;
2301             s->block_index[3]+=2;
2302
2303             /* compute motion vector & mb_type and store in context */
2304             if(s->pict_type==AV_PICTURE_TYPE_B)
2305                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2306             else
2307                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2308         }
2309         s->first_slice_line=0;
2310     }
2311     return 0;
2312 }
2313
2314 static int mb_var_thread(AVCodecContext *c, void *arg){
2315     MpegEncContext *s= *(void**)arg;
2316     int mb_x, mb_y;
2317
2318     ff_check_alignment();
2319
2320     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2321         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2322             int xx = mb_x * 16;
2323             int yy = mb_y * 16;
2324             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2325             int varc;
2326             int sum = s->dsp.pix_sum(pix, s->linesize);
2327
2328             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2329
2330             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2331             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2332             s->me.mb_var_sum_temp    += varc;
2333         }
2334     }
2335     return 0;
2336 }
2337
2338 static void write_slice_end(MpegEncContext *s){
2339     if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4){
2340         if(s->partitioned_frame){
2341             ff_mpeg4_merge_partitions(s);
2342         }
2343
2344         ff_mpeg4_stuffing(&s->pb);
2345     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2346         ff_mjpeg_encode_stuffing(&s->pb);
2347     }
2348
2349     avpriv_align_put_bits(&s->pb);
2350     flush_put_bits(&s->pb);
2351
2352     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2353         s->misc_bits+= get_bits_diff(s);
2354 }
2355
2356 static int encode_thread(AVCodecContext *c, void *arg){
2357     MpegEncContext *s= *(void**)arg;
2358     int mb_x, mb_y, pdif = 0;
2359     int chr_h= 16>>s->chroma_y_shift;
2360     int i, j;
2361     MpegEncContext best_s, backup_s;
2362     uint8_t bit_buf[2][MAX_MB_BYTES];
2363     uint8_t bit_buf2[2][MAX_MB_BYTES];
2364     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2365     PutBitContext pb[2], pb2[2], tex_pb[2];
2366 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2367
2368     ff_check_alignment();
2369
2370     for(i=0; i<2; i++){
2371         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2372         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2373         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2374     }
2375
2376     s->last_bits= put_bits_count(&s->pb);
2377     s->mv_bits=0;
2378     s->misc_bits=0;
2379     s->i_tex_bits=0;
2380     s->p_tex_bits=0;
2381     s->i_count=0;
2382     s->f_count=0;
2383     s->b_count=0;
2384     s->skip_count=0;
2385
2386     for(i=0; i<3; i++){
2387         /* init last dc values */
2388         /* note: quant matrix value (8) is implied here */
2389         s->last_dc[i] = 128 << s->intra_dc_precision;
2390
2391         s->current_picture.f.error[i] = 0;
2392     }
2393     if(s->codec_id==CODEC_ID_AMV){
2394         s->last_dc[0] = 128*8/13;
2395         s->last_dc[1] = 128*8/14;
2396         s->last_dc[2] = 128*8/14;
2397     }
2398     s->mb_skip_run = 0;
2399     memset(s->last_mv, 0, sizeof(s->last_mv));
2400
2401     s->last_mv_dir = 0;
2402
2403     switch(s->codec_id){
2404     case CODEC_ID_H263:
2405     case CODEC_ID_H263P:
2406     case CODEC_ID_FLV1:
2407         if (CONFIG_H263_ENCODER)
2408             s->gob_index = ff_h263_get_gob_height(s);
2409         break;
2410     case CODEC_ID_MPEG4:
2411         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2412             ff_mpeg4_init_partitions(s);
2413         break;
2414     }
2415
2416     s->resync_mb_x=0;
2417     s->resync_mb_y=0;
2418     s->first_slice_line = 1;
2419     s->ptr_lastgob = s->pb.buf;
2420     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2421 //    printf("row %d at %X\n", s->mb_y, (int)s);
2422         s->mb_x=0;
2423         s->mb_y= mb_y;
2424
2425         ff_set_qscale(s, s->qscale);
2426         ff_init_block_index(s);
2427
2428         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2429             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2430             int mb_type= s->mb_type[xy];
2431 //            int d;
2432             int dmin= INT_MAX;
2433             int dir;
2434
2435             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2436                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2437                 return -1;
2438             }
2439             if(s->data_partitioning){
2440                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2441                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2442                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2443                     return -1;
2444                 }
2445             }
2446
2447             s->mb_x = mb_x;
2448             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2449             ff_update_block_index(s);
2450
2451             if(CONFIG_H261_ENCODER && s->codec_id == CODEC_ID_H261){
2452                 ff_h261_reorder_mb_index(s);
2453                 xy= s->mb_y*s->mb_stride + s->mb_x;
2454                 mb_type= s->mb_type[xy];
2455             }
2456
2457             /* write gob / video packet header  */
2458             if(s->rtp_mode){
2459                 int current_packet_size, is_gob_start;
2460
2461                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2462
2463                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2464
2465                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2466
2467                 switch(s->codec_id){
2468                 case CODEC_ID_H263:
2469                 case CODEC_ID_H263P:
2470                     if(!s->h263_slice_structured)
2471                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2472                     break;
2473                 case CODEC_ID_MPEG2VIDEO:
2474                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2475                 case CODEC_ID_MPEG1VIDEO:
2476                     if(s->mb_skip_run) is_gob_start=0;
2477                     break;
2478                 }
2479
2480                 if(is_gob_start){
2481                     if(s->start_mb_y != mb_y || mb_x!=0){
2482                         write_slice_end(s);
2483
2484                         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
2485                             ff_mpeg4_init_partitions(s);
2486                         }
2487                     }
2488
2489                     assert((put_bits_count(&s->pb)&7) == 0);
2490                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2491
2492                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2493                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2494                         int d= 100 / s->avctx->error_rate;
2495                         if(r % d == 0){
2496                             current_packet_size=0;
2497                             s->pb.buf_ptr= s->ptr_lastgob;
2498                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2499                         }
2500                     }
2501
2502                     if (s->avctx->rtp_callback){
2503                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2504                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2505                     }
2506
2507                     switch(s->codec_id){
2508                     case CODEC_ID_MPEG4:
2509                         if (CONFIG_MPEG4_ENCODER) {
2510                             ff_mpeg4_encode_video_packet_header(s);
2511                             ff_mpeg4_clean_buffers(s);
2512                         }
2513                     break;
2514                     case CODEC_ID_MPEG1VIDEO:
2515                     case CODEC_ID_MPEG2VIDEO:
2516                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2517                             ff_mpeg1_encode_slice_header(s);
2518                             ff_mpeg1_clean_buffers(s);
2519                         }
2520                     break;
2521                     case CODEC_ID_H263:
2522                     case CODEC_ID_H263P:
2523                         if (CONFIG_H263_ENCODER)
2524                             h263_encode_gob_header(s, mb_y);
2525                     break;
2526                     }
2527
2528                     if(s->flags&CODEC_FLAG_PASS1){
2529                         int bits= put_bits_count(&s->pb);
2530                         s->misc_bits+= bits - s->last_bits;
2531                         s->last_bits= bits;
2532                     }
2533
2534                     s->ptr_lastgob += current_packet_size;
2535                     s->first_slice_line=1;
2536                     s->resync_mb_x=mb_x;
2537                     s->resync_mb_y=mb_y;
2538                 }
2539             }
2540
2541             if(  (s->resync_mb_x   == s->mb_x)
2542                && s->resync_mb_y+1 == s->mb_y){
2543                 s->first_slice_line=0;
2544             }
2545
2546             s->mb_skipped=0;
2547             s->dquant=0; //only for QP_RD
2548
2549             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
2550                 int next_block=0;
2551                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2552
2553                 copy_context_before_encode(&backup_s, s, -1);
2554                 backup_s.pb= s->pb;
2555                 best_s.data_partitioning= s->data_partitioning;
2556                 best_s.partitioned_frame= s->partitioned_frame;
2557                 if(s->data_partitioning){
2558                     backup_s.pb2= s->pb2;
2559                     backup_s.tex_pb= s->tex_pb;
2560                 }
2561
2562                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2563                     s->mv_dir = MV_DIR_FORWARD;
2564                     s->mv_type = MV_TYPE_16X16;
2565                     s->mb_intra= 0;
2566                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2567                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2568                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2569                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2570                 }
2571                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2572                     s->mv_dir = MV_DIR_FORWARD;
2573                     s->mv_type = MV_TYPE_FIELD;
2574                     s->mb_intra= 0;
2575                     for(i=0; i<2; i++){
2576                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2577                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2578                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2579                     }
2580                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2581                                  &dmin, &next_block, 0, 0);
2582                 }
2583                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2584                     s->mv_dir = MV_DIR_FORWARD;
2585                     s->mv_type = MV_TYPE_16X16;
2586                     s->mb_intra= 0;
2587                     s->mv[0][0][0] = 0;
2588                     s->mv[0][0][1] = 0;
2589                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2590                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2591                 }
2592                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2593                     s->mv_dir = MV_DIR_FORWARD;
2594                     s->mv_type = MV_TYPE_8X8;
2595                     s->mb_intra= 0;
2596                     for(i=0; i<4; i++){
2597                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2598                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2599                     }
2600                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2601                                  &dmin, &next_block, 0, 0);
2602                 }
2603                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2604                     s->mv_dir = MV_DIR_FORWARD;
2605                     s->mv_type = MV_TYPE_16X16;
2606                     s->mb_intra= 0;
2607                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2608                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2609                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2610                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2611                 }
2612                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2613                     s->mv_dir = MV_DIR_BACKWARD;
2614                     s->mv_type = MV_TYPE_16X16;
2615                     s->mb_intra= 0;
2616                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2617                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2618                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2619                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2620                 }
2621                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2622                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2623                     s->mv_type = MV_TYPE_16X16;
2624                     s->mb_intra= 0;
2625                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2626                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2627                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2628                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2629                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2630                                  &dmin, &next_block, 0, 0);
2631                 }
2632                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2633                     s->mv_dir = MV_DIR_FORWARD;
2634                     s->mv_type = MV_TYPE_FIELD;
2635                     s->mb_intra= 0;
2636                     for(i=0; i<2; i++){
2637                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2638                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2639                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2640                     }
2641                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2642                                  &dmin, &next_block, 0, 0);
2643                 }
2644                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2645                     s->mv_dir = MV_DIR_BACKWARD;
2646                     s->mv_type = MV_TYPE_FIELD;
2647                     s->mb_intra= 0;
2648                     for(i=0; i<2; i++){
2649                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2650                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2651                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2652                     }
2653                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2654                                  &dmin, &next_block, 0, 0);
2655                 }
2656                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2657                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2658                     s->mv_type = MV_TYPE_FIELD;
2659                     s->mb_intra= 0;
2660                     for(dir=0; dir<2; dir++){
2661                         for(i=0; i<2; i++){
2662                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2663                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2664                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2665                         }
2666                     }
2667                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2668                                  &dmin, &next_block, 0, 0);
2669                 }
2670                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2671                     s->mv_dir = 0;
2672                     s->mv_type = MV_TYPE_16X16;
2673                     s->mb_intra= 1;
2674                     s->mv[0][0][0] = 0;
2675                     s->mv[0][0][1] = 0;
2676                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2677                                  &dmin, &next_block, 0, 0);
2678                     if(s->h263_pred || s->h263_aic){
2679                         if(best_s.mb_intra)
2680                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2681                         else
2682                             ff_clean_intra_table_entries(s); //old mode?
2683                     }
2684                 }
2685
2686                 if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
2687                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2688                         const int last_qp= backup_s.qscale;
2689                         int qpi, qp, dc[6];
2690                         DCTELEM ac[6][16];
2691                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2692                         static const int dquant_tab[4]={-1,1,-2,2};
2693
2694                         assert(backup_s.dquant == 0);
2695
2696                         //FIXME intra
2697                         s->mv_dir= best_s.mv_dir;
2698                         s->mv_type = MV_TYPE_16X16;
2699                         s->mb_intra= best_s.mb_intra;
2700                         s->mv[0][0][0] = best_s.mv[0][0][0];
2701                         s->mv[0][0][1] = best_s.mv[0][0][1];
2702                         s->mv[1][0][0] = best_s.mv[1][0][0];
2703                         s->mv[1][0][1] = best_s.mv[1][0][1];
2704
2705                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2706                         for(; qpi<4; qpi++){
2707                             int dquant= dquant_tab[qpi];
2708                             qp= last_qp + dquant;
2709                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2710                                 continue;
2711                             backup_s.dquant= dquant;
2712                             if(s->mb_intra && s->dc_val[0]){
2713                                 for(i=0; i<6; i++){
2714                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2715                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2716                                 }
2717                             }
2718
2719                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2720                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2721                             if(best_s.qscale != qp){
2722                                 if(s->mb_intra && s->dc_val[0]){
2723                                     for(i=0; i<6; i++){
2724                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2725                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2726                                     }
2727                                 }
2728                             }
2729                         }
2730                     }
2731                 }
2732                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2733                     int mx= s->b_direct_mv_table[xy][0];
2734                     int my= s->b_direct_mv_table[xy][1];
2735
2736                     backup_s.dquant = 0;
2737                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2738                     s->mb_intra= 0;
2739                     ff_mpeg4_set_direct_mv(s, mx, my);
2740                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2741                                  &dmin, &next_block, mx, my);
2742                 }
2743                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2744                     backup_s.dquant = 0;
2745                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2746                     s->mb_intra= 0;
2747                     ff_mpeg4_set_direct_mv(s, 0, 0);
2748                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2749                                  &dmin, &next_block, 0, 0);
2750                 }
2751                 if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){
2752                     int coded=0;
2753                     for(i=0; i<6; i++)
2754                         coded |= s->block_last_index[i];
2755                     if(coded){
2756                         int mx,my;
2757                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2758                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2759                             mx=my=0; //FIXME find the one we actually used
2760                             ff_mpeg4_set_direct_mv(s, mx, my);
2761                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2762                             mx= s->mv[1][0][0];
2763                             my= s->mv[1][0][1];
2764                         }else{
2765                             mx= s->mv[0][0][0];
2766                             my= s->mv[0][0][1];
2767                         }
2768
2769                         s->mv_dir= best_s.mv_dir;
2770                         s->mv_type = best_s.mv_type;
2771                         s->mb_intra= 0;
2772 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2773                         s->mv[0][0][1] = best_s.mv[0][0][1];
2774                         s->mv[1][0][0] = best_s.mv[1][0][0];
2775                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2776                         backup_s.dquant= 0;
2777                         s->skipdct=1;
2778                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2779                                         &dmin, &next_block, mx, my);
2780                         s->skipdct=0;
2781                     }
2782                 }
2783
2784                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2785
2786                 copy_context_after_encode(s, &best_s, -1);
2787
2788                 pb_bits_count= put_bits_count(&s->pb);
2789                 flush_put_bits(&s->pb);
2790                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2791                 s->pb= backup_s.pb;
2792
2793                 if(s->data_partitioning){
2794                     pb2_bits_count= put_bits_count(&s->pb2);
2795                     flush_put_bits(&s->pb2);
2796                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2797                     s->pb2= backup_s.pb2;
2798
2799                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2800                     flush_put_bits(&s->tex_pb);
2801                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2802                     s->tex_pb= backup_s.tex_pb;
2803                 }
2804                 s->last_bits= put_bits_count(&s->pb);
2805
2806                 if (CONFIG_H263_ENCODER &&
2807                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2808                     ff_h263_update_motion_val(s);
2809
2810                 if(next_block==0){ //FIXME 16 vs linesize16
2811                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2812                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2813                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2814                 }
2815
2816                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2817                     MPV_decode_mb(s, s->block);
2818             } else {
2819                 int motion_x = 0, motion_y = 0;
2820                 s->mv_type=MV_TYPE_16X16;
2821                 // only one MB-Type possible
2822
2823                 switch(mb_type){
2824                 case CANDIDATE_MB_TYPE_INTRA:
2825                     s->mv_dir = 0;
2826                     s->mb_intra= 1;
2827                     motion_x= s->mv[0][0][0] = 0;
2828                     motion_y= s->mv[0][0][1] = 0;
2829                     break;
2830                 case CANDIDATE_MB_TYPE_INTER:
2831                     s->mv_dir = MV_DIR_FORWARD;
2832                     s->mb_intra= 0;
2833                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2834                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2835                     break;
2836                 case CANDIDATE_MB_TYPE_INTER_I:
2837                     s->mv_dir = MV_DIR_FORWARD;
2838                     s->mv_type = MV_TYPE_FIELD;
2839                     s->mb_intra= 0;
2840                     for(i=0; i<2; i++){
2841                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2842                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2843                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2844                     }
2845                     break;
2846                 case CANDIDATE_MB_TYPE_INTER4V:
2847                     s->mv_dir = MV_DIR_FORWARD;
2848                     s->mv_type = MV_TYPE_8X8;
2849                     s->mb_intra= 0;
2850                     for(i=0; i<4; i++){
2851                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2852                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2853                     }
2854                     break;
2855                 case CANDIDATE_MB_TYPE_DIRECT:
2856                     if (CONFIG_MPEG4_ENCODER) {
2857                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2858                         s->mb_intra= 0;
2859                         motion_x=s->b_direct_mv_table[xy][0];
2860                         motion_y=s->b_direct_mv_table[xy][1];
2861                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2862                     }
2863                     break;
2864                 case CANDIDATE_MB_TYPE_DIRECT0:
2865                     if (CONFIG_MPEG4_ENCODER) {
2866                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2867                         s->mb_intra= 0;
2868                         ff_mpeg4_set_direct_mv(s, 0, 0);
2869                     }
2870                     break;
2871                 case CANDIDATE_MB_TYPE_BIDIR:
2872                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2873                     s->mb_intra= 0;
2874                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2875                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2876                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2877                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2878                     break;
2879                 case CANDIDATE_MB_TYPE_BACKWARD:
2880                     s->mv_dir = MV_DIR_BACKWARD;
2881                     s->mb_intra= 0;
2882                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2883                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2884                     break;
2885                 case CANDIDATE_MB_TYPE_FORWARD:
2886                     s->mv_dir = MV_DIR_FORWARD;
2887                     s->mb_intra= 0;
2888                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2889                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2890 //                    printf(" %d %d ", motion_x, motion_y);
2891                     break;
2892                 case CANDIDATE_MB_TYPE_FORWARD_I:
2893                     s->mv_dir = MV_DIR_FORWARD;
2894                     s->mv_type = MV_TYPE_FIELD;
2895                     s->mb_intra= 0;
2896                     for(i=0; i<2; i++){
2897                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2898                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2899                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2900                     }
2901                     break;
2902                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2903                     s->mv_dir = MV_DIR_BACKWARD;
2904                     s->mv_type = MV_TYPE_FIELD;
2905                     s->mb_intra= 0;
2906                     for(i=0; i<2; i++){
2907                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2908                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2909                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2910                     }
2911                     break;
2912                 case CANDIDATE_MB_TYPE_BIDIR_I:
2913                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2914                     s->mv_type = MV_TYPE_FIELD;
2915                     s->mb_intra= 0;
2916                     for(dir=0; dir<2; dir++){
2917                         for(i=0; i<2; i++){
2918                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2919                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2920                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2921                         }
2922                     }
2923                     break;
2924                 default:
2925                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2926                 }
2927
2928                 encode_mb(s, motion_x, motion_y);
2929
2930                 // RAL: Update last macroblock type
2931                 s->last_mv_dir = s->mv_dir;
2932
2933                 if (CONFIG_H263_ENCODER &&
2934                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2935                     ff_h263_update_motion_val(s);
2936
2937                 MPV_decode_mb(s, s->block);
2938             }
2939
2940             /* clean the MV table in IPS frames for direct mode in B frames */
2941             if(s->mb_intra /* && I,P,S_TYPE */){
2942                 s->p_mv_table[xy][0]=0;
2943                 s->p_mv_table[xy][1]=0;
2944             }
2945
2946             if(s->flags&CODEC_FLAG_PSNR){
2947                 int w= 16;
2948                 int h= 16;
2949
2950                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2951                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2952
2953                 s->current_picture.f.error[0] += sse(
2954                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2955                     s->dest[0], w, h, s->linesize);
2956                 s->current_picture.f.error[1] += sse(
2957                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2958                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2959                 s->current_picture.f.error[2] += sse(
2960                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2961                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2962             }
2963             if(s->loop_filter){
2964                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2965                     ff_h263_loop_filter(s);
2966             }
2967 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
2968         }
2969     }
2970
2971     //not beautiful here but we must write it before flushing so it has to be here
2972     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
2973         msmpeg4_encode_ext_header(s);
2974
2975     write_slice_end(s);
2976
2977     /* Send the last GOB if RTP */
2978     if (s->avctx->rtp_callback) {
2979         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
2980         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
2981         /* Call the RTP callback to send the last GOB */
2982         emms_c();
2983         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
2984     }
2985
2986     return 0;
2987 }
2988
2989 #define MERGE(field) dst->field += src->field; src->field=0
2990 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
2991     MERGE(me.scene_change_score);
2992     MERGE(me.mc_mb_var_sum_temp);
2993     MERGE(me.mb_var_sum_temp);
2994 }
2995
2996 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
2997     int i;
2998
2999     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3000     MERGE(dct_count[1]);
3001     MERGE(mv_bits);
3002     MERGE(i_tex_bits);
3003     MERGE(p_tex_bits);
3004     MERGE(i_count);
3005     MERGE(f_count);
3006     MERGE(b_count);
3007     MERGE(skip_count);
3008     MERGE(misc_bits);
3009     MERGE(error_count);
3010     MERGE(padding_bug_score);
3011     MERGE(current_picture.f.error[0]);
3012     MERGE(current_picture.f.error[1]);
3013     MERGE(current_picture.f.error[2]);
3014
3015     if(dst->avctx->noise_reduction){
3016         for(i=0; i<64; i++){
3017             MERGE(dct_error_sum[0][i]);
3018             MERGE(dct_error_sum[1][i]);
3019         }
3020     }
3021
3022     assert(put_bits_count(&src->pb) % 8 ==0);
3023     assert(put_bits_count(&dst->pb) % 8 ==0);
3024     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3025     flush_put_bits(&dst->pb);
3026 }
3027
3028 static int estimate_qp(MpegEncContext *s, int dry_run){
3029     if (s->next_lambda){
3030         s->current_picture_ptr->f.quality =
3031         s->current_picture.f.quality = s->next_lambda;
3032         if(!dry_run) s->next_lambda= 0;
3033     } else if (!s->fixed_qscale) {
3034         s->current_picture_ptr->f.quality =
3035         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3036         if (s->current_picture.f.quality < 0)
3037             return -1;
3038     }
3039
3040     if(s->adaptive_quant){
3041         switch(s->codec_id){
3042         case CODEC_ID_MPEG4:
3043             if (CONFIG_MPEG4_ENCODER)
3044                 ff_clean_mpeg4_qscales(s);
3045             break;
3046         case CODEC_ID_H263:
3047         case CODEC_ID_H263P:
3048         case CODEC_ID_FLV1:
3049             if (CONFIG_H263_ENCODER)
3050                 ff_clean_h263_qscales(s);
3051             break;
3052         default:
3053             ff_init_qscale_tab(s);
3054         }
3055
3056         s->lambda= s->lambda_table[0];
3057         //FIXME broken
3058     }else
3059         s->lambda = s->current_picture.f.quality;
3060 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3061     update_qscale(s);
3062     return 0;
3063 }
3064
3065 /* must be called before writing the header */
3066 static void set_frame_distances(MpegEncContext * s){
3067     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3068     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3069
3070     if(s->pict_type==AV_PICTURE_TYPE_B){
3071         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3072         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3073     }else{
3074         s->pp_time= s->time - s->last_non_b_time;
3075         s->last_non_b_time= s->time;
3076         assert(s->picture_number==0 || s->pp_time > 0);
3077     }
3078 }
3079
3080 static int encode_picture(MpegEncContext *s, int picture_number)
3081 {
3082     int i;
3083     int bits;
3084     int context_count = s->slice_context_count;
3085
3086     s->picture_number = picture_number;
3087
3088     /* Reset the average MB variance */
3089     s->me.mb_var_sum_temp    =
3090     s->me.mc_mb_var_sum_temp = 0;
3091
3092     /* we need to initialize some time vars before we can encode b-frames */
3093     // RAL: Condition added for MPEG1VIDEO
3094     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3095         set_frame_distances(s);
3096     if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4)
3097         ff_set_mpeg4_time(s);
3098
3099     s->me.scene_change_score=0;
3100
3101 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3102
3103     if(s->pict_type==AV_PICTURE_TYPE_I){
3104         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3105         else                        s->no_rounding=0;
3106     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3107         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3108             s->no_rounding ^= 1;
3109     }
3110
3111     if(s->flags & CODEC_FLAG_PASS2){
3112         if (estimate_qp(s,1) < 0)
3113             return -1;
3114         ff_get_2pass_fcode(s);
3115     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3116         if(s->pict_type==AV_PICTURE_TYPE_B)
3117             s->lambda= s->last_lambda_for[s->pict_type];
3118         else
3119             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3120         update_qscale(s);
3121     }
3122
3123     if(s->codec_id != CODEC_ID_AMV){
3124         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3125         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3126         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3127         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3128     }
3129
3130     s->mb_intra=0; //for the rate distortion & bit compare functions
3131     for(i=1; i<context_count; i++){
3132         ff_update_duplicate_context(s->thread_context[i], s);
3133     }
3134
3135     if(ff_init_me(s)<0)
3136         return -1;
3137
3138     /* Estimate motion for every MB */
3139     if(s->pict_type != AV_PICTURE_TYPE_I){
3140         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3141         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3142         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3143             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3144                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3145             }
3146         }
3147
3148         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3149     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3150         /* I-Frame */
3151         for(i=0; i<s->mb_stride*s->mb_height; i++)
3152             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3153
3154         if(!s->fixed_qscale){
3155             /* finding spatial complexity for I-frame rate control */
3156             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3157         }
3158     }
3159     for(i=1; i<context_count; i++){
3160         merge_context_after_me(s, s->thread_context[i]);
3161     }
3162     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3163     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3164     emms_c();
3165
3166     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3167         s->pict_type= AV_PICTURE_TYPE_I;
3168         for(i=0; i<s->mb_stride*s->mb_height; i++)
3169             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3170 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3171     }
3172
3173     if(!s->umvplus){
3174         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3175             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3176
3177             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3178                 int a,b;
3179                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3180                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3181                 s->f_code= FFMAX3(s->f_code, a, b);
3182             }
3183
3184             ff_fix_long_p_mvs(s);
3185             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3186             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3187                 int j;
3188                 for(i=0; i<2; i++){
3189                     for(j=0; j<2; j++)
3190                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3191                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3192                 }
3193             }
3194         }
3195
3196         if(s->pict_type==AV_PICTURE_TYPE_B){
3197             int a, b;
3198
3199             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3200             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3201             s->f_code = FFMAX(a, b);
3202
3203             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3204             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3205             s->b_code = FFMAX(a, b);
3206
3207             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3208             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3209             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3210             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3211             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3212                 int dir, j;
3213                 for(dir=0; dir<2; dir++){
3214                     for(i=0; i<2; i++){
3215                         for(j=0; j<2; j++){
3216                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3217                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3218                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3219                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3220                         }
3221                     }
3222                 }
3223             }
3224         }
3225     }
3226
3227     if (estimate_qp(s, 0) < 0)
3228         return -1;
3229
3230     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3231         s->qscale= 3; //reduce clipping problems
3232
3233     if (s->out_format == FMT_MJPEG) {
3234         /* for mjpeg, we do include qscale in the matrix */
3235         for(i=1;i<64;i++){
3236             int j= s->dsp.idct_permutation[i];
3237
3238             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3239         }
3240         s->y_dc_scale_table=
3241         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3242         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3243         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3244                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3245         s->qscale= 8;
3246     }
3247     if(s->codec_id == CODEC_ID_AMV){
3248         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3249         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3250         for(i=1;i<64;i++){
3251             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3252
3253             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3254             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3255         }
3256         s->y_dc_scale_table= y;
3257         s->c_dc_scale_table= c;
3258         s->intra_matrix[0] = 13;
3259         s->chroma_intra_matrix[0] = 14;
3260         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3261                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3262         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3263                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3264         s->qscale= 8;
3265     }
3266
3267     //FIXME var duplication
3268     s->current_picture_ptr->f.key_frame =
3269     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3270     s->current_picture_ptr->f.pict_type =
3271     s->current_picture.f.pict_type = s->pict_type;
3272
3273     if (s->current_picture.f.key_frame)
3274         s->picture_in_gop_number=0;
3275
3276     s->last_bits= put_bits_count(&s->pb);
3277     switch(s->out_format) {
3278     case FMT_MJPEG:
3279         if (CONFIG_MJPEG_ENCODER)
3280             ff_mjpeg_encode_picture_header(s);
3281         break;
3282     case FMT_H261:
3283         if (CONFIG_H261_ENCODER)
3284             ff_h261_encode_picture_header(s, picture_number);
3285         break;
3286     case FMT_H263:
3287         if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
3288             ff_wmv2_encode_picture_header(s, picture_number);
3289         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3290             msmpeg4_encode_picture_header(s, picture_number);
3291         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3292             mpeg4_encode_picture_header(s, picture_number);
3293         else if (CONFIG_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
3294             rv10_encode_picture_header(s, picture_number);
3295         else if (CONFIG_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
3296             rv20_encode_picture_header(s, picture_number);
3297         else if (CONFIG_FLV_ENCODER && s->codec_id == CODEC_ID_FLV1)
3298             ff_flv_encode_picture_header(s, picture_number);
3299         else if (CONFIG_H263_ENCODER)
3300             h263_encode_picture_header(s, picture_number);
3301         break;
3302     case FMT_MPEG1:
3303         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3304             mpeg1_encode_picture_header(s, picture_number);
3305         break;
3306     case FMT_H264:
3307         break;
3308     default:
3309         assert(0);
3310     }
3311     bits= put_bits_count(&s->pb);
3312     s->header_bits= bits - s->last_bits;
3313
3314     for(i=1; i<context_count; i++){
3315         update_duplicate_context_after_me(s->thread_context[i], s);
3316     }
3317     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3318     for(i=1; i<context_count; i++){
3319         merge_context_after_encode(s, s->thread_context[i]);
3320     }
3321     emms_c();
3322     return 0;
3323 }
3324
3325 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3326     const int intra= s->mb_intra;
3327     int i;
3328
3329     s->dct_count[intra]++;
3330
3331     for(i=0; i<64; i++){
3332         int level= block[i];
3333
3334         if(level){
3335             if(level>0){
3336                 s->dct_error_sum[intra][i] += level;
3337                 level -= s->dct_offset[intra][i];
3338                 if(level<0) level=0;
3339             }else{
3340                 s->dct_error_sum[intra][i] -= level;
3341                 level += s->dct_offset[intra][i];
3342                 if(level>0) level=0;
3343             }
3344             block[i]= level;
3345         }
3346     }
3347 }
3348
3349 static int dct_quantize_trellis_c(MpegEncContext *s,
3350                                   DCTELEM *block, int n,
3351                                   int qscale, int *overflow){
3352     const int *qmat;
3353     const uint8_t *scantable= s->intra_scantable.scantable;
3354     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3355     int max=0;
3356     unsigned int threshold1, threshold2;
3357     int bias=0;
3358     int run_tab[65];
3359     int level_tab[65];
3360     int score_tab[65];
3361     int survivor[65];
3362     int survivor_count;
3363     int last_run=0;
3364     int last_level=0;
3365     int last_score= 0;
3366     int last_i;
3367     int coeff[2][64];
3368     int coeff_count[64];
3369     int qmul, qadd, start_i, last_non_zero, i, dc;
3370     const int esc_length= s->ac_esc_length;
3371     uint8_t * length;
3372     uint8_t * last_length;
3373     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3374
3375     s->dsp.fdct (block);
3376
3377     if(s->dct_error_sum)
3378         s->denoise_dct(s, block);
3379     qmul= qscale*16;
3380     qadd= ((qscale-1)|1)*8;
3381
3382     if (s->mb_intra) {
3383         int q;
3384         if (!s->h263_aic) {
3385             if (n < 4)
3386                 q = s->y_dc_scale;
3387             else
3388                 q = s->c_dc_scale;
3389             q = q << 3;
3390         } else{
3391             /* For AIC we skip quant/dequant of INTRADC */
3392             q = 1 << 3;
3393             qadd=0;
3394         }
3395
3396         /* note: block[0] is assumed to be positive */
3397         block[0] = (block[0] + (q >> 1)) / q;
3398         start_i = 1;
3399         last_non_zero = 0;
3400         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3401         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3402             bias= 1<<(QMAT_SHIFT-1);
3403         length     = s->intra_ac_vlc_length;
3404         last_length= s->intra_ac_vlc_last_length;
3405     } else {
3406         start_i = 0;
3407         last_non_zero = -1;
3408         qmat = s->q_inter_matrix[qscale];
3409         length     = s->inter_ac_vlc_length;
3410         last_length= s->inter_ac_vlc_last_length;
3411     }
3412     last_i= start_i;
3413
3414     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3415     threshold2= (threshold1<<1);
3416
3417     for(i=63; i>=start_i; i--) {
3418         const int j = scantable[i];
3419         int level = block[j] * qmat[j];
3420
3421         if(((unsigned)(level+threshold1))>threshold2){
3422             last_non_zero = i;
3423             break;
3424         }
3425     }
3426
3427     for(i=start_i; i<=last_non_zero; i++) {
3428         const int j = scantable[i];
3429         int level = block[j] * qmat[j];
3430
3431 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3432 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3433         if(((unsigned)(level+threshold1))>threshold2){
3434             if(level>0){
3435                 level= (bias + level)>>QMAT_SHIFT;
3436                 coeff[0][i]= level;
3437                 coeff[1][i]= level-1;
3438 //                coeff[2][k]= level-2;
3439             }else{
3440                 level= (bias - level)>>QMAT_SHIFT;
3441                 coeff[0][i]= -level;
3442                 coeff[1][i]= -level+1;
3443 //                coeff[2][k]= -level+2;
3444             }
3445             coeff_count[i]= FFMIN(level, 2);
3446             assert(coeff_count[i]);
3447             max |=level;
3448         }else{
3449             coeff[0][i]= (level>>31)|1;
3450             coeff_count[i]= 1;
3451         }
3452     }
3453
3454     *overflow= s->max_qcoeff < max; //overflow might have happened
3455
3456     if(last_non_zero < start_i){
3457         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3458         return last_non_zero;
3459     }
3460
3461     score_tab[start_i]= 0;
3462     survivor[0]= start_i;
3463     survivor_count= 1;
3464
3465     for(i=start_i; i<=last_non_zero; i++){
3466         int level_index, j, zero_distortion;
3467         int dct_coeff= FFABS(block[ scantable[i] ]);
3468         int best_score=256*256*256*120;
3469
3470         if (   s->dsp.fdct == fdct_ifast
3471 #ifndef FAAN_POSTSCALE
3472             || s->dsp.fdct == ff_faandct
3473 #endif
3474            )
3475             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3476         zero_distortion= dct_coeff*dct_coeff;
3477
3478         for(level_index=0; level_index < coeff_count[i]; level_index++){
3479             int distortion;
3480             int level= coeff[level_index][i];
3481             const int alevel= FFABS(level);
3482             int unquant_coeff;
3483
3484             assert(level);
3485
3486             if(s->out_format == FMT_H263){
3487                 unquant_coeff= alevel*qmul + qadd;
3488             }else{ //MPEG1
3489                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3490                 if(s->mb_intra){
3491                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3492                         unquant_coeff =   (unquant_coeff - 1) | 1;
3493                 }else{
3494                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3495                         unquant_coeff =   (unquant_coeff - 1) | 1;
3496                 }
3497                 unquant_coeff<<= 3;
3498             }
3499
3500             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3501             level+=64;
3502             if((level&(~127)) == 0){
3503                 for(j=survivor_count-1; j>=0; j--){
3504                     int run= i - survivor[j];
3505                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3506                     score += score_tab[i-run];
3507
3508                     if(score < best_score){
3509                         best_score= score;
3510                         run_tab[i+1]= run;
3511                         level_tab[i+1]= level-64;
3512                     }
3513                 }
3514
3515                 if(s->out_format == FMT_H263){
3516                     for(j=survivor_count-1; j>=0; j--){
3517                         int run= i - survivor[j];
3518                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3519                         score += score_tab[i-run];
3520                         if(score < last_score){
3521                             last_score= score;
3522                             last_run= run;
3523                             last_level= level-64;
3524                             last_i= i+1;
3525                         }
3526                     }
3527                 }
3528             }else{
3529                 distortion += esc_length*lambda;
3530                 for(j=survivor_count-1; j>=0; j--){
3531                     int run= i - survivor[j];
3532                     int score= distortion + score_tab[i-run];
3533
3534                     if(score < best_score){
3535                         best_score= score;
3536                         run_tab[i+1]= run;
3537                         level_tab[i+1]= level-64;
3538                     }
3539                 }
3540
3541                 if(s->out_format == FMT_H263){
3542                   for(j=survivor_count-1; j>=0; j--){
3543                         int run= i - survivor[j];
3544                         int score= distortion + score_tab[i-run];
3545                         if(score < last_score){
3546                             last_score= score;
3547                             last_run= run;
3548                             last_level= level-64;
3549                             last_i= i+1;
3550                         }
3551                     }
3552                 }
3553             }
3554         }
3555
3556         score_tab[i+1]= best_score;
3557
3558         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3559         if(last_non_zero <= 27){
3560             for(; survivor_count; survivor_count--){
3561                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3562                     break;
3563             }
3564         }else{
3565             for(; survivor_count; survivor_count--){
3566                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3567                     break;
3568             }
3569         }
3570
3571         survivor[ survivor_count++ ]= i+1;
3572     }
3573
3574     if(s->out_format != FMT_H263){
3575         last_score= 256*256*256*120;
3576         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3577             int score= score_tab[i];
3578             if(i) score += lambda*2; //FIXME exacter?
3579
3580             if(score < last_score){
3581                 last_score= score;
3582                 last_i= i;
3583                 last_level= level_tab[i];
3584                 last_run= run_tab[i];
3585             }
3586         }
3587     }
3588
3589     s->coded_score[n] = last_score;
3590
3591     dc= FFABS(block[0]);
3592     last_non_zero= last_i - 1;
3593     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3594
3595     if(last_non_zero < start_i)
3596         return last_non_zero;
3597
3598     if(last_non_zero == 0 && start_i == 0){
3599         int best_level= 0;
3600         int best_score= dc * dc;
3601
3602         for(i=0; i<coeff_count[0]; i++){
3603             int level= coeff[i][0];
3604             int alevel= FFABS(level);
3605             int unquant_coeff, score, distortion;
3606
3607             if(s->out_format == FMT_H263){
3608                     unquant_coeff= (alevel*qmul + qadd)>>3;
3609             }else{ //MPEG1
3610                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3611                     unquant_coeff =   (unquant_coeff - 1) | 1;
3612             }
3613             unquant_coeff = (unquant_coeff + 4) >> 3;
3614             unquant_coeff<<= 3 + 3;
3615
3616             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3617             level+=64;
3618             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3619             else                    score= distortion + esc_length*lambda;
3620
3621             if(score < best_score){
3622                 best_score= score;
3623                 best_level= level - 64;
3624             }
3625         }
3626         block[0]= best_level;
3627         s->coded_score[n] = best_score - dc*dc;
3628         if(best_level == 0) return -1;
3629         else                return last_non_zero;
3630     }
3631
3632     i= last_i;
3633     assert(last_level);
3634
3635     block[ perm_scantable[last_non_zero] ]= last_level;
3636     i -= last_run + 1;
3637
3638     for(; i>start_i; i -= run_tab[i] + 1){
3639         block[ perm_scantable[i-1] ]= level_tab[i];
3640     }
3641
3642     return last_non_zero;
3643 }
3644
3645 //#define REFINE_STATS 1
3646 static int16_t basis[64][64];
3647
3648 static void build_basis(uint8_t *perm){
3649     int i, j, x, y;
3650     emms_c();
3651     for(i=0; i<8; i++){
3652         for(j=0; j<8; j++){
3653             for(y=0; y<8; y++){
3654                 for(x=0; x<8; x++){
3655                     double s= 0.25*(1<<BASIS_SHIFT);
3656                     int index= 8*i + j;
3657                     int perm_index= perm[index];
3658                     if(i==0) s*= sqrt(0.5);
3659                     if(j==0) s*= sqrt(0.5);
3660                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3661                 }
3662             }
3663         }
3664     }
3665 }
3666
3667 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3668                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3669                         int n, int qscale){
3670     int16_t rem[64];
3671     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3672     const uint8_t *scantable= s->intra_scantable.scantable;
3673     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3674 //    unsigned int threshold1, threshold2;
3675 //    int bias=0;
3676     int run_tab[65];
3677     int prev_run=0;
3678     int prev_level=0;
3679     int qmul, qadd, start_i, last_non_zero, i, dc;
3680     uint8_t * length;
3681     uint8_t * last_length;
3682     int lambda;
3683     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3684 #ifdef REFINE_STATS
3685 static int count=0;
3686 static int after_last=0;
3687 static int to_zero=0;
3688 static int from_zero=0;
3689 static int raise=0;
3690 static int lower=0;
3691 static int messed_sign=0;
3692 #endif
3693
3694     if(basis[0][0] == 0)
3695         build_basis(s->dsp.idct_permutation);
3696
3697     qmul= qscale*2;
3698     qadd= (qscale-1)|1;
3699     if (s->mb_intra) {
3700         if (!s->h263_aic) {
3701             if (n < 4)
3702                 q = s->y_dc_scale;
3703             else
3704                 q = s->c_dc_scale;
3705         } else{
3706             /* For AIC we skip quant/dequant of INTRADC */
3707             q = 1;
3708             qadd=0;
3709         }
3710         q <<= RECON_SHIFT-3;
3711         /* note: block[0] is assumed to be positive */
3712         dc= block[0]*q;
3713 //        block[0] = (block[0] + (q >> 1)) / q;
3714         start_i = 1;
3715 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3716 //            bias= 1<<(QMAT_SHIFT-1);
3717         length     = s->intra_ac_vlc_length;
3718         last_length= s->intra_ac_vlc_last_length;
3719     } else {
3720         dc= 0;
3721         start_i = 0;
3722         length     = s->inter_ac_vlc_length;
3723         last_length= s->inter_ac_vlc_last_length;
3724     }
3725     last_non_zero = s->block_last_index[n];
3726
3727 #ifdef REFINE_STATS
3728 {START_TIMER
3729 #endif
3730     dc += (1<<(RECON_SHIFT-1));
3731     for(i=0; i<64; i++){
3732         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3733     }
3734 #ifdef REFINE_STATS
3735 STOP_TIMER("memset rem[]")}
3736 #endif
3737     sum=0;
3738     for(i=0; i<64; i++){
3739         int one= 36;
3740         int qns=4;
3741         int w;
3742
3743         w= FFABS(weight[i]) + qns*one;
3744         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3745
3746         weight[i] = w;
3747 //        w=weight[i] = (63*qns + (w/2)) / w;
3748
3749         assert(w>0);
3750         assert(w<(1<<6));
3751         sum += w*w;
3752     }
3753     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3754 #ifdef REFINE_STATS
3755 {START_TIMER
3756 #endif
3757     run=0;
3758     rle_index=0;
3759     for(i=start_i; i<=last_non_zero; i++){
3760         int j= perm_scantable[i];
3761         const int level= block[j];
3762         int coeff;
3763
3764         if(level){
3765             if(level<0) coeff= qmul*level - qadd;
3766             else        coeff= qmul*level + qadd;
3767             run_tab[rle_index++]=run;
3768             run=0;
3769
3770             s->dsp.add_8x8basis(rem, basis[j], coeff);
3771         }else{
3772             run++;
3773         }
3774     }
3775 #ifdef REFINE_STATS
3776 if(last_non_zero>0){
3777 STOP_TIMER("init rem[]")
3778 }
3779 }
3780
3781 {START_TIMER
3782 #endif
3783     for(;;){
3784         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3785         int best_coeff=0;
3786         int best_change=0;
3787         int run2, best_unquant_change=0, analyze_gradient;
3788 #ifdef REFINE_STATS
3789 {START_TIMER
3790 #endif
3791         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
3792
3793         if(analyze_gradient){
3794 #ifdef REFINE_STATS
3795 {START_TIMER
3796 #endif
3797             for(i=0; i<64; i++){
3798                 int w= weight[i];
3799
3800                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3801             }
3802 #ifdef REFINE_STATS
3803 STOP_TIMER("rem*w*w")}
3804 {START_TIMER
3805 #endif
3806             s->dsp.fdct(d1);
3807 #ifdef REFINE_STATS
3808 STOP_TIMER("dct")}
3809 #endif
3810         }
3811
3812         if(start_i){
3813             const int level= block[0];
3814             int change, old_coeff;
3815
3816             assert(s->mb_intra);
3817
3818             old_coeff= q*level;
3819
3820             for(change=-1; change<=1; change+=2){
3821                 int new_level= level + change;
3822                 int score, new_coeff;
3823
3824                 new_coeff= q*new_level;
3825                 if(new_coeff >= 2048 || new_coeff < 0)
3826                     continue;
3827
3828                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3829                 if(score<best_score){
3830                     best_score= score;
3831                     best_coeff= 0;
3832                     best_change= change;
3833                     best_unquant_change= new_coeff - old_coeff;
3834                 }
3835             }
3836         }
3837
3838         run=0;
3839         rle_index=0;
3840         run2= run_tab[rle_index++];
3841         prev_level=0;
3842         prev_run=0;
3843
3844         for(i=start_i; i<64; i++){
3845             int j= perm_scantable[i];
3846             const int level= block[j];
3847             int change, old_coeff;
3848
3849             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3850                 break;
3851
3852             if(level){
3853                 if(level<0) old_coeff= qmul*level - qadd;
3854                 else        old_coeff= qmul*level + qadd;
3855                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3856             }else{
3857                 old_coeff=0;
3858                 run2--;
3859                 assert(run2>=0 || i >= last_non_zero );
3860             }
3861
3862             for(change=-1; change<=1; change+=2){
3863                 int new_level= level + change;
3864                 int score, new_coeff, unquant_change;
3865
3866                 score=0;
3867                 if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3868                    continue;
3869
3870                 if(new_level){
3871                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3872                     else            new_coeff= qmul*new_level + qadd;
3873                     if(new_coeff >= 2048 || new_coeff <= -2048)
3874                         continue;
3875                     //FIXME check for overflow
3876
3877                     if(level){
3878                         if(level < 63 && level > -63){
3879                             if(i < last_non_zero)
3880                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3881                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3882                             else
3883                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3884                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3885                         }
3886                     }else{
3887                         assert(FFABS(new_level)==1);
3888
3889                         if(analyze_gradient){
3890                             int g= d1[ scantable[i] ];
3891                             if(g && (g^new_level) >= 0)
3892                                 continue;
3893                         }
3894
3895                         if(i < last_non_zero){
3896                             int next_i= i + run2 + 1;
3897                             int next_level= block[ perm_scantable[next_i] ] + 64;
3898
3899                             if(next_level&(~127))
3900                                 next_level= 0;
3901
3902                             if(next_i < last_non_zero)
3903                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3904                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3905                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3906                             else
3907                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3908                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3909                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3910                         }else{
3911                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3912                             if(prev_level){
3913                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3914                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3915                             }
3916                         }
3917                     }
3918                 }else{
3919                     new_coeff=0;
3920                     assert(FFABS(level)==1);
3921
3922                     if(i < last_non_zero){
3923                         int next_i= i + run2 + 1;
3924                         int next_level= block[ perm_scantable[next_i] ] + 64;
3925
3926                         if(next_level&(~127))
3927                             next_level= 0;
3928
3929                         if(next_i < last_non_zero)
3930                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3931                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3932                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3933                         else
3934                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3935                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3936                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3937                     }else{
3938                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3939                         if(prev_level){
3940                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3941                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3942                         }
3943                     }
3944                 }
3945
3946                 score *= lambda;
3947
3948                 unquant_change= new_coeff - old_coeff;
3949                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3950
3951                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3952                 if(score<best_score){
3953                     best_score= score;
3954                     best_coeff= i;
3955                     best_change= change;
3956                     best_unquant_change= unquant_change;
3957                 }
3958             }
3959             if(level){
3960                 prev_level= level + 64;
3961                 if(prev_level&(~127))
3962                     prev_level= 0;
3963                 prev_run= run;
3964                 run=0;
3965             }else{
3966                 run++;
3967             }
3968         }
3969 #ifdef REFINE_STATS
3970 STOP_TIMER("iterative step")}
3971 #endif
3972
3973         if(best_change){
3974             int j= perm_scantable[ best_coeff ];
3975
3976             block[j] += best_change;
3977
3978             if(best_coeff > last_non_zero){
3979                 last_non_zero= best_coeff;
3980                 assert(block[j]);
3981 #ifdef REFINE_STATS
3982 after_last++;
3983 #endif
3984             }else{
3985 #ifdef REFINE_STATS
3986 if(block[j]){
3987     if(block[j] - best_change){
3988         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3989             raise++;
3990         }else{
3991             lower++;
3992         }
3993     }else{
3994         from_zero++;
3995     }
3996 }else{
3997     to_zero++;
3998 }
3999 #endif
4000                 for(; last_non_zero>=start_i; last_non_zero--){
4001                     if(block[perm_scantable[last_non_zero]])
4002                         break;
4003                 }
4004             }
4005 #ifdef REFINE_STATS
4006 count++;
4007 if(256*256*256*64 % count == 0){
4008     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4009 }
4010 #endif
4011             run=0;
4012             rle_index=0;
4013             for(i=start_i; i<=last_non_zero; i++){
4014                 int j= perm_scantable[i];
4015                 const int level= block[j];
4016
4017                  if(level){
4018                      run_tab[rle_index++]=run;
4019                      run=0;
4020                  }else{
4021                      run++;
4022                  }
4023             }
4024
4025             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4026         }else{
4027             break;
4028         }
4029     }
4030 #ifdef REFINE_STATS
4031 if(last_non_zero>0){
4032 STOP_TIMER("iterative search")
4033 }
4034 }
4035 #endif
4036
4037     return last_non_zero;
4038 }
4039
4040 int dct_quantize_c(MpegEncContext *s,
4041                         DCTELEM *block, int n,
4042                         int qscale, int *overflow)
4043 {
4044     int i, j, level, last_non_zero, q, start_i;
4045     const int *qmat;
4046     const uint8_t *scantable= s->intra_scantable.scantable;
4047     int bias;
4048     int max=0;
4049     unsigned int threshold1, threshold2;
4050
4051     s->dsp.fdct (block);
4052
4053     if(s->dct_error_sum)
4054         s->denoise_dct(s, block);
4055
4056     if (s->mb_intra) {
4057         if (!s->h263_aic) {
4058             if (n < 4)
4059                 q = s->y_dc_scale;
4060             else
4061                 q = s->c_dc_scale;
4062             q = q << 3;
4063         } else
4064             /* For AIC we skip quant/dequant of INTRADC */
4065             q = 1 << 3;
4066
4067         /* note: block[0] is assumed to be positive */
4068         block[0] = (block[0] + (q >> 1)) / q;
4069         start_i = 1;
4070         last_non_zero = 0;
4071         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4072         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4073     } else {
4074         start_i = 0;
4075         last_non_zero = -1;
4076         qmat = s->q_inter_matrix[qscale];
4077         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4078     }
4079     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4080     threshold2= (threshold1<<1);
4081     for(i=63;i>=start_i;i--) {
4082         j = scantable[i];
4083         level = block[j] * qmat[j];
4084
4085         if(((unsigned)(level+threshold1))>threshold2){
4086             last_non_zero = i;
4087             break;
4088         }else{
4089             block[j]=0;
4090         }
4091     }
4092     for(i=start_i; i<=last_non_zero; i++) {
4093         j = scantable[i];
4094         level = block[j] * qmat[j];
4095
4096 //        if(   bias+level >= (1<<QMAT_SHIFT)
4097 //           || bias-level >= (1<<QMAT_SHIFT)){
4098         if(((unsigned)(level+threshold1))>threshold2){
4099             if(level>0){
4100                 level= (bias + level)>>QMAT_SHIFT;
4101                 block[j]= level;
4102             }else{
4103                 level= (bias - level)>>QMAT_SHIFT;
4104                 block[j]= -level;
4105             }
4106             max |=level;
4107         }else{
4108             block[j]=0;
4109         }
4110     }
4111     *overflow= s->max_qcoeff < max; //overflow might have happened
4112
4113     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4114     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4115         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4116
4117     return last_non_zero;
4118 }
4119
4120 #define OFFSET(x) offsetof(MpegEncContext, x)
4121 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4122 static const AVOption h263_options[] = {
4123     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4124     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4125     { NULL },
4126 };
4127
4128 static const AVClass h263_class = {
4129     .class_name = "H.263 encoder",
4130     .item_name  = av_default_item_name,
4131     .option     = h263_options,
4132     .version    = LIBAVUTIL_VERSION_INT,
4133 };
4134
4135 AVCodec ff_h263_encoder = {
4136     .name           = "h263",
4137     .type           = AVMEDIA_TYPE_VIDEO,
4138     .id             = CODEC_ID_H263,
4139     .priv_data_size = sizeof(MpegEncContext),
4140     .init           = MPV_encode_init,
4141     .encode         = MPV_encode_picture,
4142     .close          = MPV_encode_end,
4143     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4144     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4145     .priv_class     = &h263_class,
4146 };
4147
4148 static const AVOption h263p_options[] = {
4149     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4150     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4151     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4152     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4153     { NULL },
4154 };
4155 static const AVClass h263p_class = {
4156     .class_name = "H.263p encoder",
4157     .item_name  = av_default_item_name,
4158     .option     = h263p_options,
4159     .version    = LIBAVUTIL_VERSION_INT,
4160 };
4161
4162 AVCodec ff_h263p_encoder = {
4163     .name           = "h263p",
4164     .type           = AVMEDIA_TYPE_VIDEO,
4165     .id             = CODEC_ID_H263P,
4166     .priv_data_size = sizeof(MpegEncContext),
4167     .init           = MPV_encode_init,
4168     .encode         = MPV_encode_picture,
4169     .close          = MPV_encode_end,
4170     .capabilities = CODEC_CAP_SLICE_THREADS,
4171     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4172     .long_name= NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4173     .priv_class     = &h263p_class,
4174 };
4175
4176 AVCodec ff_msmpeg4v2_encoder = {
4177     .name           = "msmpeg4v2",
4178     .type           = AVMEDIA_TYPE_VIDEO,
4179     .id             = CODEC_ID_MSMPEG4V2,
4180     .priv_data_size = sizeof(MpegEncContext),
4181     .init           = MPV_encode_init,
4182     .encode         = MPV_encode_picture,
4183     .close          = MPV_encode_end,
4184     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4185     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4186 };
4187
4188 AVCodec ff_msmpeg4v3_encoder = {
4189     .name           = "msmpeg4",
4190     .type           = AVMEDIA_TYPE_VIDEO,
4191     .id             = CODEC_ID_MSMPEG4V3,
4192     .priv_data_size = sizeof(MpegEncContext),
4193     .init           = MPV_encode_init,
4194     .encode         = MPV_encode_picture,
4195     .close          = MPV_encode_end,
4196     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4197     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4198 };
4199
4200 AVCodec ff_wmv1_encoder = {
4201     .name           = "wmv1",
4202     .type           = AVMEDIA_TYPE_VIDEO,
4203     .id             = CODEC_ID_WMV1,
4204     .priv_data_size = sizeof(MpegEncContext),
4205     .init           = MPV_encode_init,
4206     .encode         = MPV_encode_picture,
4207     .close          = MPV_encode_end,
4208     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4209     .long_name= NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4210 };