]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge remote-tracking branch 'qatar/master'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "mpegvideo_common.h"
37 #include "h263.h"
38 #include "mjpegenc.h"
39 #include "msmpeg4.h"
40 #include "faandct.h"
41 #include "thread.h"
42 #include "aandcttab.h"
43 #include "flv.h"
44 #include "mpeg4video.h"
45 #include "internal.h"
46 #include "bytestream.h"
47 #include <limits.h>
48 #include "sp5x.h"
49
50 //#undef NDEBUG
51 //#include <assert.h>
52
53 static int encode_picture(MpegEncContext *s, int picture_number);
54 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
55 static int sse_mb(MpegEncContext *s);
56 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
57 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
58
59 /* enable all paranoid tests for rounding, overflows, etc... */
60 //#define PARANOID
61
62 //#define DEBUG
63
64 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
65 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
66
67 const AVOption ff_mpv_generic_options[] = {
68     FF_MPV_COMMON_OPTS
69     { NULL },
70 };
71
72 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
73                        uint16_t (*qmat16)[2][64],
74                        const uint16_t *quant_matrix,
75                        int bias, int qmin, int qmax, int intra)
76 {
77     int qscale;
78     int shift = 0;
79
80     for (qscale = qmin; qscale <= qmax; qscale++) {
81         int i;
82         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
83             dsp->fdct == ff_jpeg_fdct_islow_10 ||
84             dsp->fdct == ff_faandct) {
85             for (i = 0; i < 64; i++) {
86                 const int j = dsp->idct_permutation[i];
87                 /* 16 <= qscale * quant_matrix[i] <= 7905
88                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
89                  *             19952 <=              x  <= 249205026
90                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
91                  *           3444240 >= (1 << 36) / (x) >= 275 */
92
93                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
94                                         (qscale * quant_matrix[j]));
95             }
96         } else if (dsp->fdct == ff_fdct_ifast) {
97             for (i = 0; i < 64; i++) {
98                 const int j = dsp->idct_permutation[i];
99                 /* 16 <= qscale * quant_matrix[i] <= 7905
100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
101                  *             19952 <=              x  <= 249205026
102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
103                  *           3444240 >= (1 << 36) / (x) >= 275 */
104
105                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
106                                         (ff_aanscales[i] * qscale * quant_matrix[j]));
107             }
108         } else {
109             for (i = 0; i < 64; i++) {
110                 const int j = dsp->idct_permutation[i];
111                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
112                  * Assume x = qscale * quant_matrix[i]
113                  * So             16 <=              x  <= 7905
114                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
115                  * so          32768 >= (1 << 19) / (x) >= 67 */
116                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
117                                         (qscale * quant_matrix[j]));
118                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
119                 //                    (qscale * quant_matrix[i]);
120                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
121                                        (qscale * quant_matrix[j]);
122
123                 if (qmat16[qscale][0][i] == 0 ||
124                     qmat16[qscale][0][i] == 128 * 256)
125                     qmat16[qscale][0][i] = 128 * 256 - 1;
126                 qmat16[qscale][1][i] =
127                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
128                                 qmat16[qscale][0][i]);
129             }
130         }
131
132         for (i = intra; i < 64; i++) {
133             int64_t max = 8191;
134             if (dsp->fdct == ff_fdct_ifast) {
135                 max = (8191LL * ff_aanscales[i]) >> 14;
136             }
137             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
138                 shift++;
139             }
140         }
141     }
142     if (shift) {
143         av_log(NULL, AV_LOG_INFO,
144                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
145                QMAT_SHIFT - shift);
146     }
147 }
148
149 static inline void update_qscale(MpegEncContext *s)
150 {
151     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
152                 (FF_LAMBDA_SHIFT + 7);
153     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
154
155     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
156                  FF_LAMBDA_SHIFT;
157 }
158
159 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
160 {
161     int i;
162
163     if (matrix) {
164         put_bits(pb, 1, 1);
165         for (i = 0; i < 64; i++) {
166             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
167         }
168     } else
169         put_bits(pb, 1, 0);
170 }
171
172 /**
173  * init s->current_picture.qscale_table from s->lambda_table
174  */
175 void ff_init_qscale_tab(MpegEncContext *s)
176 {
177     int8_t * const qscale_table = s->current_picture.f.qscale_table;
178     int i;
179
180     for (i = 0; i < s->mb_num; i++) {
181         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
182         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
183         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
184                                                   s->avctx->qmax);
185     }
186 }
187
188 static void copy_picture_attributes(MpegEncContext *s,
189                                     AVFrame *dst,
190                                     AVFrame *src)
191 {
192     int i;
193
194     dst->pict_type              = src->pict_type;
195     dst->quality                = src->quality;
196     dst->coded_picture_number   = src->coded_picture_number;
197     dst->display_picture_number = src->display_picture_number;
198     //dst->reference              = src->reference;
199     dst->pts                    = src->pts;
200     dst->interlaced_frame       = src->interlaced_frame;
201     dst->top_field_first        = src->top_field_first;
202
203     if (s->avctx->me_threshold) {
204         if (!src->motion_val[0])
205             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
206         if (!src->mb_type)
207             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
208         if (!src->ref_index[0])
209             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
210         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
211             av_log(s->avctx, AV_LOG_ERROR,
212                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
213                    src->motion_subsample_log2, dst->motion_subsample_log2);
214
215         memcpy(dst->mb_type, src->mb_type,
216                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
217
218         for (i = 0; i < 2; i++) {
219             int stride = ((16 * s->mb_width ) >>
220                           src->motion_subsample_log2) + 1;
221             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
222
223             if (src->motion_val[i] &&
224                 src->motion_val[i] != dst->motion_val[i]) {
225                 memcpy(dst->motion_val[i], src->motion_val[i],
226                        2 * stride * height * sizeof(int16_t));
227             }
228             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
229                 memcpy(dst->ref_index[i], src->ref_index[i],
230                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
231             }
232         }
233     }
234 }
235
236 static void update_duplicate_context_after_me(MpegEncContext *dst,
237                                               MpegEncContext *src)
238 {
239 #define COPY(a) dst->a= src->a
240     COPY(pict_type);
241     COPY(current_picture);
242     COPY(f_code);
243     COPY(b_code);
244     COPY(qscale);
245     COPY(lambda);
246     COPY(lambda2);
247     COPY(picture_in_gop_number);
248     COPY(gop_picture_number);
249     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
250     COPY(progressive_frame);    // FIXME don't set in encode_header
251     COPY(partitioned_frame);    // FIXME don't set in encode_header
252 #undef COPY
253 }
254
255 /**
256  * Set the given MpegEncContext to defaults for encoding.
257  * the changed fields will not depend upon the prior state of the MpegEncContext.
258  */
259 static void MPV_encode_defaults(MpegEncContext *s)
260 {
261     int i;
262     ff_MPV_common_defaults(s);
263
264     for (i = -16; i < 16; i++) {
265         default_fcode_tab[i + MAX_MV] = 1;
266     }
267     s->me.mv_penalty = default_mv_penalty;
268     s->fcode_tab     = default_fcode_tab;
269 }
270
271 /* init video encoder */
272 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
273 {
274     MpegEncContext *s = avctx->priv_data;
275     int i;
276     int chroma_h_shift, chroma_v_shift;
277
278     MPV_encode_defaults(s);
279
280     switch (avctx->codec_id) {
281     case CODEC_ID_MPEG2VIDEO:
282         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
283             avctx->pix_fmt != PIX_FMT_YUV422P) {
284             av_log(avctx, AV_LOG_ERROR,
285                    "only YUV420 and YUV422 are supported\n");
286             return -1;
287         }
288         break;
289     case CODEC_ID_LJPEG:
290         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
291             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
292             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
293             avctx->pix_fmt != PIX_FMT_BGR0     &&
294             avctx->pix_fmt != PIX_FMT_BGRA     &&
295             avctx->pix_fmt != PIX_FMT_BGR24    &&
296             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
297               avctx->pix_fmt != PIX_FMT_YUV422P &&
298               avctx->pix_fmt != PIX_FMT_YUV444P) ||
299              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
300             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
301             return -1;
302         }
303         break;
304     case CODEC_ID_MJPEG:
305     case CODEC_ID_AMV:
306         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
307             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
308             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
309               avctx->pix_fmt != PIX_FMT_YUV422P) ||
310              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
311             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
312             return -1;
313         }
314         break;
315     default:
316         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
317             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
318             return -1;
319         }
320     }
321
322     switch (avctx->pix_fmt) {
323     case PIX_FMT_YUVJ422P:
324     case PIX_FMT_YUV422P:
325         s->chroma_format = CHROMA_422;
326         break;
327     case PIX_FMT_YUVJ420P:
328     case PIX_FMT_YUV420P:
329     default:
330         s->chroma_format = CHROMA_420;
331         break;
332     }
333
334     s->bit_rate = avctx->bit_rate;
335     s->width    = avctx->width;
336     s->height   = avctx->height;
337     if (avctx->gop_size > 600 &&
338         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
339         av_log(avctx, AV_LOG_WARNING,
340                "keyframe interval too large!, reducing it from %d to %d\n",
341                avctx->gop_size, 600);
342         avctx->gop_size = 600;
343     }
344     s->gop_size     = avctx->gop_size;
345     s->avctx        = avctx;
346     s->flags        = avctx->flags;
347     s->flags2       = avctx->flags2;
348     s->max_b_frames = avctx->max_b_frames;
349     s->codec_id     = avctx->codec->id;
350 #if FF_API_MPV_GLOBAL_OPTS
351     if (avctx->luma_elim_threshold)
352         s->luma_elim_threshold   = avctx->luma_elim_threshold;
353     if (avctx->chroma_elim_threshold)
354         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
355 #endif
356     s->strict_std_compliance = avctx->strict_std_compliance;
357     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
358     s->mpeg_quant         = avctx->mpeg_quant;
359     s->rtp_mode           = !!avctx->rtp_payload_size;
360     s->intra_dc_precision = avctx->intra_dc_precision;
361     s->user_specified_pts = AV_NOPTS_VALUE;
362
363     if (s->gop_size <= 1) {
364         s->intra_only = 1;
365         s->gop_size   = 12;
366     } else {
367         s->intra_only = 0;
368     }
369
370     s->me_method = avctx->me_method;
371
372     /* Fixed QSCALE */
373     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
374
375 #if FF_API_MPV_GLOBAL_OPTS
376     if (s->flags & CODEC_FLAG_QP_RD)
377         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
378 #endif
379
380     s->adaptive_quant = (s->avctx->lumi_masking ||
381                          s->avctx->dark_masking ||
382                          s->avctx->temporal_cplx_masking ||
383                          s->avctx->spatial_cplx_masking  ||
384                          s->avctx->p_masking      ||
385                          s->avctx->border_masking ||
386                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
387                         !s->fixed_qscale;
388
389     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
390
391     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
392         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
393         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
394             return -1;
395     }
396
397     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
398         av_log(avctx, AV_LOG_INFO,
399                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
400     }
401
402     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
403         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
404         return -1;
405     }
406
407     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
408         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
409         return -1;
410     }
411
412     if (avctx->rc_max_rate &&
413         avctx->rc_max_rate == avctx->bit_rate &&
414         avctx->rc_max_rate != avctx->rc_min_rate) {
415         av_log(avctx, AV_LOG_INFO,
416                "impossible bitrate constraints, this will fail\n");
417     }
418
419     if (avctx->rc_buffer_size &&
420         avctx->bit_rate * (int64_t)avctx->time_base.num >
421             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
422         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
423         return -1;
424     }
425
426     if (!s->fixed_qscale &&
427         avctx->bit_rate * av_q2d(avctx->time_base) >
428             avctx->bit_rate_tolerance) {
429         av_log(avctx, AV_LOG_ERROR,
430                "bitrate tolerance too small for bitrate\n");
431         return -1;
432     }
433
434     if (s->avctx->rc_max_rate &&
435         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
436         (s->codec_id == CODEC_ID_MPEG1VIDEO ||
437          s->codec_id == CODEC_ID_MPEG2VIDEO) &&
438         90000LL * (avctx->rc_buffer_size - 1) >
439             s->avctx->rc_max_rate * 0xFFFFLL) {
440         av_log(avctx, AV_LOG_INFO,
441                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
442                "specified vbv buffer is too large for the given bitrate!\n");
443     }
444
445     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != CODEC_ID_MPEG4 &&
446         s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P &&
447         s->codec_id != CODEC_ID_FLV1) {
448         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
449         return -1;
450     }
451
452     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
453         av_log(avctx, AV_LOG_ERROR,
454                "OBMC is only supported with simple mb decision\n");
455         return -1;
456     }
457
458     if (s->quarter_sample && s->codec_id != CODEC_ID_MPEG4) {
459         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
460         return -1;
461     }
462
463     if (s->max_b_frames                    &&
464         s->codec_id != CODEC_ID_MPEG4      &&
465         s->codec_id != CODEC_ID_MPEG1VIDEO &&
466         s->codec_id != CODEC_ID_MPEG2VIDEO) {
467         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
468         return -1;
469     }
470
471     if ((s->codec_id == CODEC_ID_MPEG4 ||
472          s->codec_id == CODEC_ID_H263  ||
473          s->codec_id == CODEC_ID_H263P) &&
474         (avctx->sample_aspect_ratio.num > 255 ||
475          avctx->sample_aspect_ratio.den > 255)) {
476         av_log(avctx, AV_LOG_WARNING,
477                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
478                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
479         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
480                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
481     }
482
483     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
484         s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO) {
485         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
486         return -1;
487     }
488
489     // FIXME mpeg2 uses that too
490     if (s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4) {
491         av_log(avctx, AV_LOG_ERROR,
492                "mpeg2 style quantization not supported by codec\n");
493         return -1;
494     }
495
496 #if FF_API_MPV_GLOBAL_OPTS
497     if (s->flags & CODEC_FLAG_CBP_RD)
498         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
499 #endif
500
501     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
502         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
503         return -1;
504     }
505
506     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
507         s->avctx->mb_decision != FF_MB_DECISION_RD) {
508         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
509         return -1;
510     }
511
512     if (s->avctx->scenechange_threshold < 1000000000 &&
513         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
514         av_log(avctx, AV_LOG_ERROR,
515                "closed gop with scene change detection are not supported yet, "
516                "set threshold to 1000000000\n");
517         return -1;
518     }
519
520     if (s->flags & CODEC_FLAG_LOW_DELAY) {
521         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
522             av_log(avctx, AV_LOG_ERROR,
523                   "low delay forcing is only available for mpeg2\n");
524             return -1;
525         }
526         if (s->max_b_frames != 0) {
527             av_log(avctx, AV_LOG_ERROR,
528                    "b frames cannot be used with low delay\n");
529             return -1;
530         }
531     }
532
533     if (s->q_scale_type == 1) {
534         if (avctx->qmax > 12) {
535             av_log(avctx, AV_LOG_ERROR,
536                    "non linear quant only supports qmax <= 12 currently\n");
537             return -1;
538         }
539     }
540
541     if (s->avctx->thread_count > 1         &&
542         s->codec_id != CODEC_ID_MPEG4      &&
543         s->codec_id != CODEC_ID_MPEG1VIDEO &&
544         s->codec_id != CODEC_ID_MPEG2VIDEO &&
545         (s->codec_id != CODEC_ID_H263P)) {
546         av_log(avctx, AV_LOG_ERROR,
547                "multi threaded encoding not supported by codec\n");
548         return -1;
549     }
550
551     if (s->avctx->thread_count < 1) {
552         av_log(avctx, AV_LOG_ERROR,
553                "automatic thread number detection not supported by codec, "
554                "patch welcome\n");
555         return -1;
556     }
557
558     if (s->avctx->thread_count > 1)
559         s->rtp_mode = 1;
560
561     if (!avctx->time_base.den || !avctx->time_base.num) {
562         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
563         return -1;
564     }
565
566     i = (INT_MAX / 2 + 128) >> 8;
567     if (avctx->me_threshold >= i) {
568         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
569                i - 1);
570         return -1;
571     }
572     if (avctx->mb_threshold >= i) {
573         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
574                i - 1);
575         return -1;
576     }
577
578     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
579         av_log(avctx, AV_LOG_INFO,
580                "notice: b_frame_strategy only affects the first pass\n");
581         avctx->b_frame_strategy = 0;
582     }
583
584     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
585     if (i > 1) {
586         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
587         avctx->time_base.den /= i;
588         avctx->time_base.num /= i;
589         //return -1;
590     }
591
592     if (s->mpeg_quant || s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || s->codec_id == CODEC_ID_MJPEG || s->codec_id==CODEC_ID_AMV) {
593         // (a + x * 3 / 8) / x
594         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
595         s->inter_quant_bias = 0;
596     } else {
597         s->intra_quant_bias = 0;
598         // (a - x / 4) / x
599         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
600     }
601
602     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
603         s->intra_quant_bias = avctx->intra_quant_bias;
604     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
605         s->inter_quant_bias = avctx->inter_quant_bias;
606
607     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
608
609     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
610                                   &chroma_v_shift);
611
612     if (avctx->codec_id == CODEC_ID_MPEG4 &&
613         s->avctx->time_base.den > (1 << 16) - 1) {
614         av_log(avctx, AV_LOG_ERROR,
615                "timebase %d/%d not supported by MPEG 4 standard, "
616                "the maximum admitted value for the timebase denominator "
617                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
618                (1 << 16) - 1);
619         return -1;
620     }
621     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
622
623 #if FF_API_MPV_GLOBAL_OPTS
624     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
625         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
626     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
627         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
628     if (avctx->quantizer_noise_shaping)
629         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
630 #endif
631
632     switch (avctx->codec->id) {
633     case CODEC_ID_MPEG1VIDEO:
634         s->out_format = FMT_MPEG1;
635         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
636         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
637         break;
638     case CODEC_ID_MPEG2VIDEO:
639         s->out_format = FMT_MPEG1;
640         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
641         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
642         s->rtp_mode   = 1;
643         break;
644     case CODEC_ID_LJPEG:
645     case CODEC_ID_MJPEG:
646     case CODEC_ID_AMV:
647         s->out_format = FMT_MJPEG;
648         s->intra_only = 1; /* force intra only for jpeg */
649         if (avctx->codec->id == CODEC_ID_LJPEG &&
650             (avctx->pix_fmt == PIX_FMT_BGR0
651              || s->avctx->pix_fmt == PIX_FMT_BGRA
652              || s->avctx->pix_fmt == PIX_FMT_BGR24)) {
653             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
654             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
655             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
656         } else {
657             s->mjpeg_vsample[0] = 2;
658             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
659             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
660             s->mjpeg_hsample[0] = 2;
661             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
662             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
663         }
664         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
665             ff_mjpeg_encode_init(s) < 0)
666             return -1;
667         avctx->delay = 0;
668         s->low_delay = 1;
669         break;
670     case CODEC_ID_H261:
671         if (!CONFIG_H261_ENCODER)
672             return -1;
673         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
674             av_log(avctx, AV_LOG_ERROR,
675                    "The specified picture size of %dx%d is not valid for the "
676                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
677                     s->width, s->height);
678             return -1;
679         }
680         s->out_format = FMT_H261;
681         avctx->delay  = 0;
682         s->low_delay  = 1;
683         break;
684     case CODEC_ID_H263:
685         if (!CONFIG_H263_ENCODER)
686             return -1;
687         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
688                              s->width, s->height) == 8) {
689             av_log(avctx, AV_LOG_ERROR,
690                    "The specified picture size of %dx%d is not valid for "
691                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
692                    "352x288, 704x576, and 1408x1152. "
693                    "Try H.263+.\n", s->width, s->height);
694             return -1;
695         }
696         s->out_format = FMT_H263;
697         avctx->delay  = 0;
698         s->low_delay  = 1;
699         break;
700     case CODEC_ID_H263P:
701         s->out_format = FMT_H263;
702         s->h263_plus  = 1;
703         /* Fx */
704         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
705         s->modified_quant  = s->h263_aic;
706         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
707         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
708
709         /* /Fx */
710         /* These are just to be sure */
711         avctx->delay = 0;
712         s->low_delay = 1;
713         break;
714     case CODEC_ID_FLV1:
715         s->out_format      = FMT_H263;
716         s->h263_flv        = 2; /* format = 1; 11-bit codes */
717         s->unrestricted_mv = 1;
718         s->rtp_mode  = 0; /* don't allow GOB */
719         avctx->delay = 0;
720         s->low_delay = 1;
721         break;
722     case CODEC_ID_RV10:
723         s->out_format = FMT_H263;
724         avctx->delay  = 0;
725         s->low_delay  = 1;
726         break;
727     case CODEC_ID_RV20:
728         s->out_format      = FMT_H263;
729         avctx->delay       = 0;
730         s->low_delay       = 1;
731         s->modified_quant  = 1;
732         s->h263_aic        = 1;
733         s->h263_plus       = 1;
734         s->loop_filter     = 1;
735         s->unrestricted_mv = 0;
736         break;
737     case CODEC_ID_MPEG4:
738         s->out_format      = FMT_H263;
739         s->h263_pred       = 1;
740         s->unrestricted_mv = 1;
741         s->low_delay       = s->max_b_frames ? 0 : 1;
742         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
743         break;
744     case CODEC_ID_MSMPEG4V2:
745         s->out_format      = FMT_H263;
746         s->h263_pred       = 1;
747         s->unrestricted_mv = 1;
748         s->msmpeg4_version = 2;
749         avctx->delay       = 0;
750         s->low_delay       = 1;
751         break;
752     case CODEC_ID_MSMPEG4V3:
753         s->out_format        = FMT_H263;
754         s->h263_pred         = 1;
755         s->unrestricted_mv   = 1;
756         s->msmpeg4_version   = 3;
757         s->flipflop_rounding = 1;
758         avctx->delay         = 0;
759         s->low_delay         = 1;
760         break;
761     case CODEC_ID_WMV1:
762         s->out_format        = FMT_H263;
763         s->h263_pred         = 1;
764         s->unrestricted_mv   = 1;
765         s->msmpeg4_version   = 4;
766         s->flipflop_rounding = 1;
767         avctx->delay         = 0;
768         s->low_delay         = 1;
769         break;
770     case CODEC_ID_WMV2:
771         s->out_format        = FMT_H263;
772         s->h263_pred         = 1;
773         s->unrestricted_mv   = 1;
774         s->msmpeg4_version   = 5;
775         s->flipflop_rounding = 1;
776         avctx->delay         = 0;
777         s->low_delay         = 1;
778         break;
779     default:
780         return -1;
781     }
782
783     avctx->has_b_frames = !s->low_delay;
784
785     s->encoding = 1;
786
787     s->progressive_frame    =
788     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
789                                                 CODEC_FLAG_INTERLACED_ME) ||
790                                 s->alternate_scan);
791
792     /* init */
793     if (ff_MPV_common_init(s) < 0)
794         return -1;
795
796     if (!s->dct_quantize)
797         s->dct_quantize = ff_dct_quantize_c;
798     if (!s->denoise_dct)
799         s->denoise_dct  = denoise_dct_c;
800     s->fast_dct_quantize = s->dct_quantize;
801     if (avctx->trellis)
802         s->dct_quantize  = dct_quantize_trellis_c;
803
804     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
805         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
806
807     s->quant_precision = 5;
808
809     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
810     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
811
812     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
813         ff_h261_encode_init(s);
814     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
815         ff_h263_encode_init(s);
816     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
817         ff_msmpeg4_encode_init(s);
818     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
819         && s->out_format == FMT_MPEG1)
820         ff_mpeg1_encode_init(s);
821
822     /* init q matrix */
823     for (i = 0; i < 64; i++) {
824         int j = s->dsp.idct_permutation[i];
825         if (CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4 &&
826             s->mpeg_quant) {
827             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
828             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
829         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
830             s->intra_matrix[j] =
831             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
832         } else {
833             /* mpeg1/2 */
834             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
835             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
836         }
837         if (s->avctx->intra_matrix)
838             s->intra_matrix[j] = s->avctx->intra_matrix[i];
839         if (s->avctx->inter_matrix)
840             s->inter_matrix[j] = s->avctx->inter_matrix[i];
841     }
842
843     /* precompute matrix */
844     /* for mjpeg, we do include qscale in the matrix */
845     if (s->out_format != FMT_MJPEG) {
846         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
847                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
848                           31, 1);
849         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
850                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
851                           31, 0);
852     }
853
854     if (ff_rate_control_init(s) < 0)
855         return -1;
856
857     return 0;
858 }
859
860 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
861 {
862     MpegEncContext *s = avctx->priv_data;
863
864     ff_rate_control_uninit(s);
865
866     ff_MPV_common_end(s);
867     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
868         s->out_format == FMT_MJPEG)
869         ff_mjpeg_encode_close(s);
870
871     av_freep(&avctx->extradata);
872
873     return 0;
874 }
875
876 static int get_sae(uint8_t *src, int ref, int stride)
877 {
878     int x,y;
879     int acc = 0;
880
881     for (y = 0; y < 16; y++) {
882         for (x = 0; x < 16; x++) {
883             acc += FFABS(src[x + y * stride] - ref);
884         }
885     }
886
887     return acc;
888 }
889
890 static int get_intra_count(MpegEncContext *s, uint8_t *src,
891                            uint8_t *ref, int stride)
892 {
893     int x, y, w, h;
894     int acc = 0;
895
896     w = s->width  & ~15;
897     h = s->height & ~15;
898
899     for (y = 0; y < h; y += 16) {
900         for (x = 0; x < w; x += 16) {
901             int offset = x + y * stride;
902             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
903                                      16);
904             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
905             int sae  = get_sae(src + offset, mean, stride);
906
907             acc += sae + 500 < sad;
908         }
909     }
910     return acc;
911 }
912
913
914 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
915 {
916     AVFrame *pic = NULL;
917     int64_t pts;
918     int i;
919     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
920                                                  (s->low_delay ? 0 : 1);
921     int direct = 1;
922
923     if (pic_arg) {
924         pts = pic_arg->pts;
925         pic_arg->display_picture_number = s->input_picture_number++;
926
927         if (pts != AV_NOPTS_VALUE) {
928             if (s->user_specified_pts != AV_NOPTS_VALUE) {
929                 int64_t time = pts;
930                 int64_t last = s->user_specified_pts;
931
932                 if (time <= last) {
933                     av_log(s->avctx, AV_LOG_ERROR,
934                            "Error, Invalid timestamp=%"PRId64", "
935                            "last=%"PRId64"\n", pts, s->user_specified_pts);
936                     return -1;
937                 }
938
939                 if (!s->low_delay && pic_arg->display_picture_number == 1)
940                     s->dts_delta = time - last;
941             }
942             s->user_specified_pts = pts;
943         } else {
944             if (s->user_specified_pts != AV_NOPTS_VALUE) {
945                 s->user_specified_pts =
946                 pts = s->user_specified_pts + 1;
947                 av_log(s->avctx, AV_LOG_INFO,
948                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
949                        pts);
950             } else {
951                 pts = pic_arg->display_picture_number;
952             }
953         }
954     }
955
956   if (pic_arg) {
957     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
958         direct = 0;
959     if (pic_arg->linesize[0] != s->linesize)
960         direct = 0;
961     if (pic_arg->linesize[1] != s->uvlinesize)
962         direct = 0;
963     if (pic_arg->linesize[2] != s->uvlinesize)
964         direct = 0;
965
966     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
967     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
968
969     if (direct) {
970         i = ff_find_unused_picture(s, 1);
971         if (i < 0)
972             return i;
973
974         pic = &s->picture[i].f;
975         pic->reference = 3;
976
977         for (i = 0; i < 4; i++) {
978             pic->data[i]     = pic_arg->data[i];
979             pic->linesize[i] = pic_arg->linesize[i];
980         }
981         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
982             return -1;
983         }
984     } else {
985         i = ff_find_unused_picture(s, 0);
986         if (i < 0)
987             return i;
988
989         pic = &s->picture[i].f;
990         pic->reference = 3;
991
992         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
993             return -1;
994         }
995
996         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
997             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
998             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
999             // empty
1000         } else {
1001             int h_chroma_shift, v_chroma_shift;
1002             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
1003                                           &v_chroma_shift);
1004
1005             for (i = 0; i < 3; i++) {
1006                 int src_stride = pic_arg->linesize[i];
1007                 int dst_stride = i ? s->uvlinesize : s->linesize;
1008                 int h_shift = i ? h_chroma_shift : 0;
1009                 int v_shift = i ? v_chroma_shift : 0;
1010                 int w = s->width  >> h_shift;
1011                 int h = s->height >> v_shift;
1012                 uint8_t *src = pic_arg->data[i];
1013                 uint8_t *dst = pic->data[i];
1014
1015                 if(s->codec_id == CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)){
1016                     h= ((s->height+15)/16*16)>>v_shift;
1017                 }
1018
1019                 if (!s->avctx->rc_buffer_size)
1020                     dst += INPLACE_OFFSET;
1021
1022                 if (src_stride == dst_stride)
1023                     memcpy(dst, src, src_stride * h);
1024                 else {
1025                     while (h--) {
1026                         memcpy(dst, src, w);
1027                         dst += dst_stride;
1028                         src += src_stride;
1029                     }
1030                 }
1031             }
1032         }
1033     }
1034     copy_picture_attributes(s, pic, pic_arg);
1035     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1036   }
1037
1038     /* shift buffer entries */
1039     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1040         s->input_picture[i - 1] = s->input_picture[i];
1041
1042     s->input_picture[encoding_delay] = (Picture*) pic;
1043
1044     return 0;
1045 }
1046
1047 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1048 {
1049     int x, y, plane;
1050     int score = 0;
1051     int64_t score64 = 0;
1052
1053     for (plane = 0; plane < 3; plane++) {
1054         const int stride = p->f.linesize[plane];
1055         const int bw = plane ? 1 : 2;
1056         for (y = 0; y < s->mb_height * bw; y++) {
1057             for (x = 0; x < s->mb_width * bw; x++) {
1058                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1059                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1060                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1061                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1062
1063                 switch (s->avctx->frame_skip_exp) {
1064                 case 0: score    =  FFMAX(score, v);          break;
1065                 case 1: score   += FFABS(v);                  break;
1066                 case 2: score   += v * v;                     break;
1067                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1068                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1069                 }
1070             }
1071         }
1072     }
1073
1074     if (score)
1075         score64 = score;
1076
1077     if (score64 < s->avctx->frame_skip_threshold)
1078         return 1;
1079     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1080         return 1;
1081     return 0;
1082 }
1083
1084 static int estimate_best_b_count(MpegEncContext *s)
1085 {
1086     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1087     AVCodecContext *c = avcodec_alloc_context3(NULL);
1088     AVFrame input[FF_MAX_B_FRAMES + 2];
1089     const int scale = s->avctx->brd_scale;
1090     int i, j, out_size, p_lambda, b_lambda, lambda2;
1091     int outbuf_size  = s->width * s->height; // FIXME
1092     uint8_t *outbuf  = av_malloc(outbuf_size);
1093     int64_t best_rd  = INT64_MAX;
1094     int best_b_count = -1;
1095
1096     assert(scale >= 0 && scale <= 3);
1097
1098     //emms_c();
1099     //s->next_picture_ptr->quality;
1100     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1101     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1102     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1103     if (!b_lambda) // FIXME we should do this somewhere else
1104         b_lambda = p_lambda;
1105     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1106                FF_LAMBDA_SHIFT;
1107
1108     c->width        = s->width  >> scale;
1109     c->height       = s->height >> scale;
1110     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1111                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1112     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1113     c->mb_decision  = s->avctx->mb_decision;
1114     c->me_cmp       = s->avctx->me_cmp;
1115     c->mb_cmp       = s->avctx->mb_cmp;
1116     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1117     c->pix_fmt      = PIX_FMT_YUV420P;
1118     c->time_base    = s->avctx->time_base;
1119     c->max_b_frames = s->max_b_frames;
1120
1121     if (avcodec_open2(c, codec, NULL) < 0)
1122         return -1;
1123
1124     for (i = 0; i < s->max_b_frames + 2; i++) {
1125         int ysize = c->width * c->height;
1126         int csize = (c->width / 2) * (c->height / 2);
1127         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1128                                                 s->next_picture_ptr;
1129
1130         avcodec_get_frame_defaults(&input[i]);
1131         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1132         input[i].data[1]     = input[i].data[0] + ysize;
1133         input[i].data[2]     = input[i].data[1] + csize;
1134         input[i].linesize[0] = c->width;
1135         input[i].linesize[1] =
1136         input[i].linesize[2] = c->width / 2;
1137
1138         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1139             pre_input = *pre_input_ptr;
1140
1141             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1142                 pre_input.f.data[0] += INPLACE_OFFSET;
1143                 pre_input.f.data[1] += INPLACE_OFFSET;
1144                 pre_input.f.data[2] += INPLACE_OFFSET;
1145             }
1146
1147             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1148                                  pre_input.f.data[0], pre_input.f.linesize[0],
1149                                  c->width,      c->height);
1150             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1151                                  pre_input.f.data[1], pre_input.f.linesize[1],
1152                                  c->width >> 1, c->height >> 1);
1153             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1154                                  pre_input.f.data[2], pre_input.f.linesize[2],
1155                                  c->width >> 1, c->height >> 1);
1156         }
1157     }
1158
1159     for (j = 0; j < s->max_b_frames + 1; j++) {
1160         int64_t rd = 0;
1161
1162         if (!s->input_picture[j])
1163             break;
1164
1165         c->error[0] = c->error[1] = c->error[2] = 0;
1166
1167         input[0].pict_type = AV_PICTURE_TYPE_I;
1168         input[0].quality   = 1 * FF_QP2LAMBDA;
1169         out_size           = avcodec_encode_video(c, outbuf,
1170                                                   outbuf_size, &input[0]);
1171         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1172
1173         for (i = 0; i < s->max_b_frames + 1; i++) {
1174             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1175
1176             input[i + 1].pict_type = is_p ?
1177                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1178             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1179             out_size = avcodec_encode_video(c, outbuf, outbuf_size,
1180                                             &input[i + 1]);
1181             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1182         }
1183
1184         /* get the delayed frames */
1185         while (out_size) {
1186             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
1187             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1188         }
1189
1190         rd += c->error[0] + c->error[1] + c->error[2];
1191
1192         if (rd < best_rd) {
1193             best_rd = rd;
1194             best_b_count = j;
1195         }
1196     }
1197
1198     av_freep(&outbuf);
1199     avcodec_close(c);
1200     av_freep(&c);
1201
1202     for (i = 0; i < s->max_b_frames + 2; i++) {
1203         av_freep(&input[i].data[0]);
1204     }
1205
1206     return best_b_count;
1207 }
1208
1209 static int select_input_picture(MpegEncContext *s)
1210 {
1211     int i;
1212
1213     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1214         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1215     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1216
1217     /* set next picture type & ordering */
1218     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1219         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1220             s->next_picture_ptr == NULL || s->intra_only) {
1221             s->reordered_input_picture[0] = s->input_picture[0];
1222             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1223             s->reordered_input_picture[0]->f.coded_picture_number =
1224                 s->coded_picture_number++;
1225         } else {
1226             int b_frames;
1227
1228             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1229                 if (s->picture_in_gop_number < s->gop_size &&
1230                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1231                     // FIXME check that te gop check above is +-1 correct
1232                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1233                     //       s->input_picture[0]->f.data[0],
1234                     //       s->input_picture[0]->pts);
1235
1236                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1237                         for (i = 0; i < 4; i++)
1238                             s->input_picture[0]->f.data[i] = NULL;
1239                         s->input_picture[0]->f.type = 0;
1240                     } else {
1241                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1242                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1243
1244                         s->avctx->release_buffer(s->avctx,
1245                                                  &s->input_picture[0]->f);
1246                     }
1247
1248                     emms_c();
1249                     ff_vbv_update(s, 0);
1250
1251                     goto no_output_pic;
1252                 }
1253             }
1254
1255             if (s->flags & CODEC_FLAG_PASS2) {
1256                 for (i = 0; i < s->max_b_frames + 1; i++) {
1257                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1258
1259                     if (pict_num >= s->rc_context.num_entries)
1260                         break;
1261                     if (!s->input_picture[i]) {
1262                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1263                         break;
1264                     }
1265
1266                     s->input_picture[i]->f.pict_type =
1267                         s->rc_context.entry[pict_num].new_pict_type;
1268                 }
1269             }
1270
1271             if (s->avctx->b_frame_strategy == 0) {
1272                 b_frames = s->max_b_frames;
1273                 while (b_frames && !s->input_picture[b_frames])
1274                     b_frames--;
1275             } else if (s->avctx->b_frame_strategy == 1) {
1276                 for (i = 1; i < s->max_b_frames + 1; i++) {
1277                     if (s->input_picture[i] &&
1278                         s->input_picture[i]->b_frame_score == 0) {
1279                         s->input_picture[i]->b_frame_score =
1280                             get_intra_count(s,
1281                                             s->input_picture[i    ]->f.data[0],
1282                                             s->input_picture[i - 1]->f.data[0],
1283                                             s->linesize) + 1;
1284                     }
1285                 }
1286                 for (i = 0; i < s->max_b_frames + 1; i++) {
1287                     if (s->input_picture[i] == NULL ||
1288                         s->input_picture[i]->b_frame_score - 1 >
1289                             s->mb_num / s->avctx->b_sensitivity)
1290                         break;
1291                 }
1292
1293                 b_frames = FFMAX(0, i - 1);
1294
1295                 /* reset scores */
1296                 for (i = 0; i < b_frames + 1; i++) {
1297                     s->input_picture[i]->b_frame_score = 0;
1298                 }
1299             } else if (s->avctx->b_frame_strategy == 2) {
1300                 b_frames = estimate_best_b_count(s);
1301             } else {
1302                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1303                 b_frames = 0;
1304             }
1305
1306             emms_c();
1307             //static int b_count = 0;
1308             //b_count += b_frames;
1309             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1310
1311             for (i = b_frames - 1; i >= 0; i--) {
1312                 int type = s->input_picture[i]->f.pict_type;
1313                 if (type && type != AV_PICTURE_TYPE_B)
1314                     b_frames = i;
1315             }
1316             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1317                 b_frames == s->max_b_frames) {
1318                 av_log(s->avctx, AV_LOG_ERROR,
1319                        "warning, too many b frames in a row\n");
1320             }
1321
1322             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1323                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1324                     s->gop_size > s->picture_in_gop_number) {
1325                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1326                 } else {
1327                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1328                         b_frames = 0;
1329                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1330                 }
1331             }
1332
1333             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1334                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1335                 b_frames--;
1336
1337             s->reordered_input_picture[0] = s->input_picture[b_frames];
1338             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1339                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1340             s->reordered_input_picture[0]->f.coded_picture_number =
1341                 s->coded_picture_number++;
1342             for (i = 0; i < b_frames; i++) {
1343                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1344                 s->reordered_input_picture[i + 1]->f.pict_type =
1345                     AV_PICTURE_TYPE_B;
1346                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1347                     s->coded_picture_number++;
1348             }
1349         }
1350     }
1351 no_output_pic:
1352     if (s->reordered_input_picture[0]) {
1353         s->reordered_input_picture[0]->f.reference =
1354            s->reordered_input_picture[0]->f.pict_type !=
1355                AV_PICTURE_TYPE_B ? 3 : 0;
1356
1357         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1358
1359         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1360             s->avctx->rc_buffer_size) {
1361             // input is a shared pix, so we can't modifiy it -> alloc a new
1362             // one & ensure that the shared one is reuseable
1363
1364             Picture *pic;
1365             int i = ff_find_unused_picture(s, 0);
1366             if (i < 0)
1367                 return i;
1368             pic = &s->picture[i];
1369
1370             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1371             if (ff_alloc_picture(s, pic, 0) < 0) {
1372                 return -1;
1373             }
1374
1375             /* mark us unused / free shared pic */
1376             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1377                 s->avctx->release_buffer(s->avctx,
1378                                          &s->reordered_input_picture[0]->f);
1379             for (i = 0; i < 4; i++)
1380                 s->reordered_input_picture[0]->f.data[i] = NULL;
1381             s->reordered_input_picture[0]->f.type = 0;
1382
1383             copy_picture_attributes(s, &pic->f,
1384                                     &s->reordered_input_picture[0]->f);
1385
1386             s->current_picture_ptr = pic;
1387         } else {
1388             // input is not a shared pix -> reuse buffer for current_pix
1389
1390             assert(s->reordered_input_picture[0]->f.type ==
1391                        FF_BUFFER_TYPE_USER ||
1392                    s->reordered_input_picture[0]->f.type ==
1393                        FF_BUFFER_TYPE_INTERNAL);
1394
1395             s->current_picture_ptr = s->reordered_input_picture[0];
1396             for (i = 0; i < 4; i++) {
1397                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1398             }
1399         }
1400         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1401
1402         s->picture_number = s->new_picture.f.display_picture_number;
1403         //printf("dpn:%d\n", s->picture_number);
1404     } else {
1405         memset(&s->new_picture, 0, sizeof(Picture));
1406     }
1407     return 0;
1408 }
1409
1410 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1411                           AVFrame *pic_arg, int *got_packet)
1412 {
1413     MpegEncContext *s = avctx->priv_data;
1414     int i, stuffing_count, ret;
1415     int context_count = s->slice_context_count;
1416
1417     s->picture_in_gop_number++;
1418
1419     if (load_input_picture(s, pic_arg) < 0)
1420         return -1;
1421
1422     if (select_input_picture(s) < 0) {
1423         return -1;
1424     }
1425
1426     /* output? */
1427     if (s->new_picture.f.data[0]) {
1428         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1429             return ret;
1430         if (s->mb_info) {
1431             s->mb_info_ptr = av_packet_new_side_data(pkt,
1432                                  AV_PKT_DATA_H263_MB_INFO,
1433                                  s->mb_width*s->mb_height*12);
1434             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1435         }
1436
1437         for (i = 0; i < context_count; i++) {
1438             int start_y = s->thread_context[i]->start_mb_y;
1439             int   end_y = s->thread_context[i]->  end_mb_y;
1440             int h       = s->mb_height;
1441             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1442             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1443
1444             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1445         }
1446
1447         s->pict_type = s->new_picture.f.pict_type;
1448         //emms_c();
1449         //printf("qs:%f %f %d\n", s->new_picture.quality,
1450         //       s->current_picture.quality, s->qscale);
1451         ff_MPV_frame_start(s, avctx);
1452 vbv_retry:
1453         if (encode_picture(s, s->picture_number) < 0)
1454             return -1;
1455
1456         avctx->header_bits = s->header_bits;
1457         avctx->mv_bits     = s->mv_bits;
1458         avctx->misc_bits   = s->misc_bits;
1459         avctx->i_tex_bits  = s->i_tex_bits;
1460         avctx->p_tex_bits  = s->p_tex_bits;
1461         avctx->i_count     = s->i_count;
1462         // FIXME f/b_count in avctx
1463         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1464         avctx->skip_count  = s->skip_count;
1465
1466         ff_MPV_frame_end(s);
1467
1468         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1469             ff_mjpeg_encode_picture_trailer(s);
1470
1471         if (avctx->rc_buffer_size) {
1472             RateControlContext *rcc = &s->rc_context;
1473             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1474
1475             if (put_bits_count(&s->pb) > max_size &&
1476                 s->lambda < s->avctx->lmax) {
1477                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1478                                        (s->qscale + 1) / s->qscale);
1479                 if (s->adaptive_quant) {
1480                     int i;
1481                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1482                         s->lambda_table[i] =
1483                             FFMAX(s->lambda_table[i] + 1,
1484                                   s->lambda_table[i] * (s->qscale + 1) /
1485                                   s->qscale);
1486                 }
1487                 s->mb_skipped = 0;        // done in MPV_frame_start()
1488                 // done in encode_picture() so we must undo it
1489                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1490                     if (s->flipflop_rounding          ||
1491                         s->codec_id == CODEC_ID_H263P ||
1492                         s->codec_id == CODEC_ID_MPEG4)
1493                         s->no_rounding ^= 1;
1494                 }
1495                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1496                     s->time_base       = s->last_time_base;
1497                     s->last_non_b_time = s->time - s->pp_time;
1498                 }
1499                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1500                 for (i = 0; i < context_count; i++) {
1501                     PutBitContext *pb = &s->thread_context[i]->pb;
1502                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1503                 }
1504                 goto vbv_retry;
1505             }
1506
1507             assert(s->avctx->rc_max_rate);
1508         }
1509
1510         if (s->flags & CODEC_FLAG_PASS1)
1511             ff_write_pass1_stats(s);
1512
1513         for (i = 0; i < 4; i++) {
1514             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1515             avctx->error[i] += s->current_picture_ptr->f.error[i];
1516         }
1517
1518         if (s->flags & CODEC_FLAG_PASS1)
1519             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1520                    avctx->i_tex_bits + avctx->p_tex_bits ==
1521                        put_bits_count(&s->pb));
1522         flush_put_bits(&s->pb);
1523         s->frame_bits  = put_bits_count(&s->pb);
1524
1525         stuffing_count = ff_vbv_update(s, s->frame_bits);
1526         if (stuffing_count) {
1527             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1528                     stuffing_count + 50) {
1529                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1530                 return -1;
1531             }
1532
1533             switch (s->codec_id) {
1534             case CODEC_ID_MPEG1VIDEO:
1535             case CODEC_ID_MPEG2VIDEO:
1536                 while (stuffing_count--) {
1537                     put_bits(&s->pb, 8, 0);
1538                 }
1539             break;
1540             case CODEC_ID_MPEG4:
1541                 put_bits(&s->pb, 16, 0);
1542                 put_bits(&s->pb, 16, 0x1C3);
1543                 stuffing_count -= 4;
1544                 while (stuffing_count--) {
1545                     put_bits(&s->pb, 8, 0xFF);
1546                 }
1547             break;
1548             default:
1549                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1550             }
1551             flush_put_bits(&s->pb);
1552             s->frame_bits  = put_bits_count(&s->pb);
1553         }
1554
1555         /* update mpeg1/2 vbv_delay for CBR */
1556         if (s->avctx->rc_max_rate                          &&
1557             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1558             s->out_format == FMT_MPEG1                     &&
1559             90000LL * (avctx->rc_buffer_size - 1) <=
1560                 s->avctx->rc_max_rate * 0xFFFFLL) {
1561             int vbv_delay, min_delay;
1562             double inbits  = s->avctx->rc_max_rate *
1563                              av_q2d(s->avctx->time_base);
1564             int    minbits = s->frame_bits - 8 *
1565                              (s->vbv_delay_ptr - s->pb.buf - 1);
1566             double bits    = s->rc_context.buffer_index + minbits - inbits;
1567
1568             if (bits < 0)
1569                 av_log(s->avctx, AV_LOG_ERROR,
1570                        "Internal error, negative bits\n");
1571
1572             assert(s->repeat_first_field == 0);
1573
1574             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1575             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1576                         s->avctx->rc_max_rate;
1577
1578             vbv_delay = FFMAX(vbv_delay, min_delay);
1579
1580             assert(vbv_delay < 0xFFFF);
1581
1582             s->vbv_delay_ptr[0] &= 0xF8;
1583             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1584             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1585             s->vbv_delay_ptr[2] &= 0x07;
1586             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1587             avctx->vbv_delay     = vbv_delay * 300;
1588         }
1589         s->total_bits     += s->frame_bits;
1590         avctx->frame_bits  = s->frame_bits;
1591
1592         pkt->pts = s->current_picture.f.pts;
1593         if (!s->low_delay) {
1594             if (!s->current_picture.f.coded_picture_number)
1595                 pkt->dts = pkt->pts - s->dts_delta;
1596             else
1597                 pkt->dts = s->reordered_pts;
1598             s->reordered_pts = s->input_picture[0]->f.pts;
1599         } else
1600             pkt->dts = pkt->pts;
1601         if (s->current_picture.f.key_frame)
1602             pkt->flags |= AV_PKT_FLAG_KEY;
1603         if (s->mb_info)
1604             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1605     } else {
1606         assert((put_bits_ptr(&s->pb) == s->pb.buf));
1607         s->frame_bits = 0;
1608     }
1609     assert((s->frame_bits & 7) == 0);
1610
1611     pkt->size = s->frame_bits / 8;
1612     *got_packet = !!pkt->size;
1613     return 0;
1614 }
1615
1616 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1617                                                 int n, int threshold)
1618 {
1619     static const char tab[64] = {
1620         3, 2, 2, 1, 1, 1, 1, 1,
1621         1, 1, 1, 1, 1, 1, 1, 1,
1622         1, 1, 1, 1, 1, 1, 1, 1,
1623         0, 0, 0, 0, 0, 0, 0, 0,
1624         0, 0, 0, 0, 0, 0, 0, 0,
1625         0, 0, 0, 0, 0, 0, 0, 0,
1626         0, 0, 0, 0, 0, 0, 0, 0,
1627         0, 0, 0, 0, 0, 0, 0, 0
1628     };
1629     int score = 0;
1630     int run = 0;
1631     int i;
1632     DCTELEM *block = s->block[n];
1633     const int last_index = s->block_last_index[n];
1634     int skip_dc;
1635
1636     if (threshold < 0) {
1637         skip_dc = 0;
1638         threshold = -threshold;
1639     } else
1640         skip_dc = 1;
1641
1642     /* Are all we could set to zero already zero? */
1643     if (last_index <= skip_dc - 1)
1644         return;
1645
1646     for (i = 0; i <= last_index; i++) {
1647         const int j = s->intra_scantable.permutated[i];
1648         const int level = FFABS(block[j]);
1649         if (level == 1) {
1650             if (skip_dc && i == 0)
1651                 continue;
1652             score += tab[run];
1653             run = 0;
1654         } else if (level > 1) {
1655             return;
1656         } else {
1657             run++;
1658         }
1659     }
1660     if (score >= threshold)
1661         return;
1662     for (i = skip_dc; i <= last_index; i++) {
1663         const int j = s->intra_scantable.permutated[i];
1664         block[j] = 0;
1665     }
1666     if (block[0])
1667         s->block_last_index[n] = 0;
1668     else
1669         s->block_last_index[n] = -1;
1670 }
1671
1672 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1673                                int last_index)
1674 {
1675     int i;
1676     const int maxlevel = s->max_qcoeff;
1677     const int minlevel = s->min_qcoeff;
1678     int overflow = 0;
1679
1680     if (s->mb_intra) {
1681         i = 1; // skip clipping of intra dc
1682     } else
1683         i = 0;
1684
1685     for (; i <= last_index; i++) {
1686         const int j = s->intra_scantable.permutated[i];
1687         int level = block[j];
1688
1689         if (level > maxlevel) {
1690             level = maxlevel;
1691             overflow++;
1692         } else if (level < minlevel) {
1693             level = minlevel;
1694             overflow++;
1695         }
1696
1697         block[j] = level;
1698     }
1699
1700     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1701         av_log(s->avctx, AV_LOG_INFO,
1702                "warning, clipping %d dct coefficients to %d..%d\n",
1703                overflow, minlevel, maxlevel);
1704 }
1705
1706 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1707 {
1708     int x, y;
1709     // FIXME optimize
1710     for (y = 0; y < 8; y++) {
1711         for (x = 0; x < 8; x++) {
1712             int x2, y2;
1713             int sum = 0;
1714             int sqr = 0;
1715             int count = 0;
1716
1717             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1718                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1719                     int v = ptr[x2 + y2 * stride];
1720                     sum += v;
1721                     sqr += v * v;
1722                     count++;
1723                 }
1724             }
1725             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1726         }
1727     }
1728 }
1729
1730 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1731                                                 int motion_x, int motion_y,
1732                                                 int mb_block_height,
1733                                                 int mb_block_count)
1734 {
1735     int16_t weight[8][64];
1736     DCTELEM orig[8][64];
1737     const int mb_x = s->mb_x;
1738     const int mb_y = s->mb_y;
1739     int i;
1740     int skip_dct[8];
1741     int dct_offset = s->linesize * 8; // default for progressive frames
1742     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1743     int wrap_y, wrap_c;
1744
1745     for (i = 0; i < mb_block_count; i++)
1746         skip_dct[i] = s->skipdct;
1747
1748     if (s->adaptive_quant) {
1749         const int last_qp = s->qscale;
1750         const int mb_xy = mb_x + mb_y * s->mb_stride;
1751
1752         s->lambda = s->lambda_table[mb_xy];
1753         update_qscale(s);
1754
1755         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1756             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1757             s->dquant = s->qscale - last_qp;
1758
1759             if (s->out_format == FMT_H263) {
1760                 s->dquant = av_clip(s->dquant, -2, 2);
1761
1762                 if (s->codec_id == CODEC_ID_MPEG4) {
1763                     if (!s->mb_intra) {
1764                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1765                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1766                                 s->dquant = 0;
1767                         }
1768                         if (s->mv_type == MV_TYPE_8X8)
1769                             s->dquant = 0;
1770                     }
1771                 }
1772             }
1773         }
1774         ff_set_qscale(s, last_qp + s->dquant);
1775     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1776         ff_set_qscale(s, s->qscale + s->dquant);
1777
1778     wrap_y = s->linesize;
1779     wrap_c = s->uvlinesize;
1780     ptr_y  = s->new_picture.f.data[0] +
1781              (mb_y * 16 * wrap_y)              + mb_x * 16;
1782     ptr_cb = s->new_picture.f.data[1] +
1783              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1784     ptr_cr = s->new_picture.f.data[2] +
1785              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1786
1787     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != CODEC_ID_AMV){
1788         uint8_t *ebuf = s->edge_emu_buffer + 32;
1789         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1790                                 mb_y * 16, s->width, s->height);
1791         ptr_y = ebuf;
1792         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1793                                 mb_block_height, mb_x * 8, mb_y * 8,
1794                                 s->width >> 1, s->height >> 1);
1795         ptr_cb = ebuf + 18 * wrap_y;
1796         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1797                                 mb_block_height, mb_x * 8, mb_y * 8,
1798                                 s->width >> 1, s->height >> 1);
1799         ptr_cr = ebuf + 18 * wrap_y + 8;
1800     }
1801
1802     if (s->mb_intra) {
1803         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1804             int progressive_score, interlaced_score;
1805
1806             s->interlaced_dct = 0;
1807             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1808                                                     NULL, wrap_y, 8) +
1809                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1810                                                     NULL, wrap_y, 8) - 400;
1811
1812             if (progressive_score > 0) {
1813                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1814                                                        NULL, wrap_y * 2, 8) +
1815                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1816                                                        NULL, wrap_y * 2, 8);
1817                 if (progressive_score > interlaced_score) {
1818                     s->interlaced_dct = 1;
1819
1820                     dct_offset = wrap_y;
1821                     wrap_y <<= 1;
1822                     if (s->chroma_format == CHROMA_422)
1823                         wrap_c <<= 1;
1824                 }
1825             }
1826         }
1827
1828         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1829         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1830         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1831         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1832
1833         if (s->flags & CODEC_FLAG_GRAY) {
1834             skip_dct[4] = 1;
1835             skip_dct[5] = 1;
1836         } else {
1837             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1838             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1839             if (!s->chroma_y_shift) { /* 422 */
1840                 s->dsp.get_pixels(s->block[6],
1841                                   ptr_cb + (dct_offset >> 1), wrap_c);
1842                 s->dsp.get_pixels(s->block[7],
1843                                   ptr_cr + (dct_offset >> 1), wrap_c);
1844             }
1845         }
1846     } else {
1847         op_pixels_func (*op_pix)[4];
1848         qpel_mc_func (*op_qpix)[16];
1849         uint8_t *dest_y, *dest_cb, *dest_cr;
1850
1851         dest_y  = s->dest[0];
1852         dest_cb = s->dest[1];
1853         dest_cr = s->dest[2];
1854
1855         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1856             op_pix  = s->dsp.put_pixels_tab;
1857             op_qpix = s->dsp.put_qpel_pixels_tab;
1858         } else {
1859             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1860             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1861         }
1862
1863         if (s->mv_dir & MV_DIR_FORWARD) {
1864             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data,
1865                        op_pix, op_qpix);
1866             op_pix  = s->dsp.avg_pixels_tab;
1867             op_qpix = s->dsp.avg_qpel_pixels_tab;
1868         }
1869         if (s->mv_dir & MV_DIR_BACKWARD) {
1870             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data,
1871                        op_pix, op_qpix);
1872         }
1873
1874         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1875             int progressive_score, interlaced_score;
1876
1877             s->interlaced_dct = 0;
1878             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1879                                                     ptr_y,              wrap_y,
1880                                                     8) +
1881                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1882                                                     ptr_y + wrap_y * 8, wrap_y,
1883                                                     8) - 400;
1884
1885             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1886                 progressive_score -= 400;
1887
1888             if (progressive_score > 0) {
1889                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1890                                                        ptr_y,
1891                                                        wrap_y * 2, 8) +
1892                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1893                                                        ptr_y + wrap_y,
1894                                                        wrap_y * 2, 8);
1895
1896                 if (progressive_score > interlaced_score) {
1897                     s->interlaced_dct = 1;
1898
1899                     dct_offset = wrap_y;
1900                     wrap_y <<= 1;
1901                     if (s->chroma_format == CHROMA_422)
1902                         wrap_c <<= 1;
1903                 }
1904             }
1905         }
1906
1907         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1908         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1909         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1910                            dest_y + dct_offset, wrap_y);
1911         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1912                            dest_y + dct_offset + 8, wrap_y);
1913
1914         if (s->flags & CODEC_FLAG_GRAY) {
1915             skip_dct[4] = 1;
1916             skip_dct[5] = 1;
1917         } else {
1918             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1919             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1920             if (!s->chroma_y_shift) { /* 422 */
1921                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1922                                    dest_cb + (dct_offset >> 1), wrap_c);
1923                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1924                                    dest_cr + (dct_offset >> 1), wrap_c);
1925             }
1926         }
1927         /* pre quantization */
1928         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1929                 2 * s->qscale * s->qscale) {
1930             // FIXME optimize
1931             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1932                               wrap_y, 8) < 20 * s->qscale)
1933                 skip_dct[0] = 1;
1934             if (s->dsp.sad[1](NULL, ptr_y + 8,
1935                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1936                 skip_dct[1] = 1;
1937             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1938                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1939                 skip_dct[2] = 1;
1940             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1941                               dest_y + dct_offset + 8,
1942                               wrap_y, 8) < 20 * s->qscale)
1943                 skip_dct[3] = 1;
1944             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1945                               wrap_c, 8) < 20 * s->qscale)
1946                 skip_dct[4] = 1;
1947             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1948                               wrap_c, 8) < 20 * s->qscale)
1949                 skip_dct[5] = 1;
1950             if (!s->chroma_y_shift) { /* 422 */
1951                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1952                                   dest_cb + (dct_offset >> 1),
1953                                   wrap_c, 8) < 20 * s->qscale)
1954                     skip_dct[6] = 1;
1955                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1956                                   dest_cr + (dct_offset >> 1),
1957                                   wrap_c, 8) < 20 * s->qscale)
1958                     skip_dct[7] = 1;
1959             }
1960         }
1961     }
1962
1963     if (s->quantizer_noise_shaping) {
1964         if (!skip_dct[0])
1965             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1966         if (!skip_dct[1])
1967             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1968         if (!skip_dct[2])
1969             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1970         if (!skip_dct[3])
1971             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1972         if (!skip_dct[4])
1973             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1974         if (!skip_dct[5])
1975             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1976         if (!s->chroma_y_shift) { /* 422 */
1977             if (!skip_dct[6])
1978                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1979                                   wrap_c);
1980             if (!skip_dct[7])
1981                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1982                                   wrap_c);
1983         }
1984         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
1985     }
1986
1987     /* DCT & quantize */
1988     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1989     {
1990         for (i = 0; i < mb_block_count; i++) {
1991             if (!skip_dct[i]) {
1992                 int overflow;
1993                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1994                 // FIXME we could decide to change to quantizer instead of
1995                 // clipping
1996                 // JS: I don't think that would be a good idea it could lower
1997                 //     quality instead of improve it. Just INTRADC clipping
1998                 //     deserves changes in quantizer
1999                 if (overflow)
2000                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2001             } else
2002                 s->block_last_index[i] = -1;
2003         }
2004         if (s->quantizer_noise_shaping) {
2005             for (i = 0; i < mb_block_count; i++) {
2006                 if (!skip_dct[i]) {
2007                     s->block_last_index[i] =
2008                         dct_quantize_refine(s, s->block[i], weight[i],
2009                                             orig[i], i, s->qscale);
2010                 }
2011             }
2012         }
2013
2014         if (s->luma_elim_threshold && !s->mb_intra)
2015             for (i = 0; i < 4; i++)
2016                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2017         if (s->chroma_elim_threshold && !s->mb_intra)
2018             for (i = 4; i < mb_block_count; i++)
2019                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2020
2021         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2022             for (i = 0; i < mb_block_count; i++) {
2023                 if (s->block_last_index[i] == -1)
2024                     s->coded_score[i] = INT_MAX / 256;
2025             }
2026         }
2027     }
2028
2029     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2030         s->block_last_index[4] =
2031         s->block_last_index[5] = 0;
2032         s->block[4][0] =
2033         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2034     }
2035
2036     // non c quantize code returns incorrect block_last_index FIXME
2037     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2038         for (i = 0; i < mb_block_count; i++) {
2039             int j;
2040             if (s->block_last_index[i] > 0) {
2041                 for (j = 63; j > 0; j--) {
2042                     if (s->block[i][s->intra_scantable.permutated[j]])
2043                         break;
2044                 }
2045                 s->block_last_index[i] = j;
2046             }
2047         }
2048     }
2049
2050     /* huffman encode */
2051     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2052     case CODEC_ID_MPEG1VIDEO:
2053     case CODEC_ID_MPEG2VIDEO:
2054         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2055             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2056         break;
2057     case CODEC_ID_MPEG4:
2058         if (CONFIG_MPEG4_ENCODER)
2059             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2060         break;
2061     case CODEC_ID_MSMPEG4V2:
2062     case CODEC_ID_MSMPEG4V3:
2063     case CODEC_ID_WMV1:
2064         if (CONFIG_MSMPEG4_ENCODER)
2065             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2066         break;
2067     case CODEC_ID_WMV2:
2068         if (CONFIG_WMV2_ENCODER)
2069             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2070         break;
2071     case CODEC_ID_H261:
2072         if (CONFIG_H261_ENCODER)
2073             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2074         break;
2075     case CODEC_ID_H263:
2076     case CODEC_ID_H263P:
2077     case CODEC_ID_FLV1:
2078     case CODEC_ID_RV10:
2079     case CODEC_ID_RV20:
2080         if (CONFIG_H263_ENCODER)
2081             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2082         break;
2083     case CODEC_ID_MJPEG:
2084     case CODEC_ID_AMV:
2085         if (CONFIG_MJPEG_ENCODER)
2086             ff_mjpeg_encode_mb(s, s->block);
2087         break;
2088     default:
2089         assert(0);
2090     }
2091 }
2092
2093 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2094 {
2095     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2096     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2097 }
2098
2099 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2100     int i;
2101
2102     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2103
2104     /* mpeg1 */
2105     d->mb_skip_run= s->mb_skip_run;
2106     for(i=0; i<3; i++)
2107         d->last_dc[i] = s->last_dc[i];
2108
2109     /* statistics */
2110     d->mv_bits= s->mv_bits;
2111     d->i_tex_bits= s->i_tex_bits;
2112     d->p_tex_bits= s->p_tex_bits;
2113     d->i_count= s->i_count;
2114     d->f_count= s->f_count;
2115     d->b_count= s->b_count;
2116     d->skip_count= s->skip_count;
2117     d->misc_bits= s->misc_bits;
2118     d->last_bits= 0;
2119
2120     d->mb_skipped= 0;
2121     d->qscale= s->qscale;
2122     d->dquant= s->dquant;
2123
2124     d->esc3_level_length= s->esc3_level_length;
2125 }
2126
2127 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2128     int i;
2129
2130     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2131     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2132
2133     /* mpeg1 */
2134     d->mb_skip_run= s->mb_skip_run;
2135     for(i=0; i<3; i++)
2136         d->last_dc[i] = s->last_dc[i];
2137
2138     /* statistics */
2139     d->mv_bits= s->mv_bits;
2140     d->i_tex_bits= s->i_tex_bits;
2141     d->p_tex_bits= s->p_tex_bits;
2142     d->i_count= s->i_count;
2143     d->f_count= s->f_count;
2144     d->b_count= s->b_count;
2145     d->skip_count= s->skip_count;
2146     d->misc_bits= s->misc_bits;
2147
2148     d->mb_intra= s->mb_intra;
2149     d->mb_skipped= s->mb_skipped;
2150     d->mv_type= s->mv_type;
2151     d->mv_dir= s->mv_dir;
2152     d->pb= s->pb;
2153     if(s->data_partitioning){
2154         d->pb2= s->pb2;
2155         d->tex_pb= s->tex_pb;
2156     }
2157     d->block= s->block;
2158     for(i=0; i<8; i++)
2159         d->block_last_index[i]= s->block_last_index[i];
2160     d->interlaced_dct= s->interlaced_dct;
2161     d->qscale= s->qscale;
2162
2163     d->esc3_level_length= s->esc3_level_length;
2164 }
2165
2166 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2167                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2168                            int *dmin, int *next_block, int motion_x, int motion_y)
2169 {
2170     int score;
2171     uint8_t *dest_backup[3];
2172
2173     copy_context_before_encode(s, backup, type);
2174
2175     s->block= s->blocks[*next_block];
2176     s->pb= pb[*next_block];
2177     if(s->data_partitioning){
2178         s->pb2   = pb2   [*next_block];
2179         s->tex_pb= tex_pb[*next_block];
2180     }
2181
2182     if(*next_block){
2183         memcpy(dest_backup, s->dest, sizeof(s->dest));
2184         s->dest[0] = s->rd_scratchpad;
2185         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2186         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2187         assert(s->linesize >= 32); //FIXME
2188     }
2189
2190     encode_mb(s, motion_x, motion_y);
2191
2192     score= put_bits_count(&s->pb);
2193     if(s->data_partitioning){
2194         score+= put_bits_count(&s->pb2);
2195         score+= put_bits_count(&s->tex_pb);
2196     }
2197
2198     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2199         ff_MPV_decode_mb(s, s->block);
2200
2201         score *= s->lambda2;
2202         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2203     }
2204
2205     if(*next_block){
2206         memcpy(s->dest, dest_backup, sizeof(s->dest));
2207     }
2208
2209     if(score<*dmin){
2210         *dmin= score;
2211         *next_block^=1;
2212
2213         copy_context_after_encode(best, s, type);
2214     }
2215 }
2216
2217 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2218     uint32_t *sq = ff_squareTbl + 256;
2219     int acc=0;
2220     int x,y;
2221
2222     if(w==16 && h==16)
2223         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2224     else if(w==8 && h==8)
2225         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2226
2227     for(y=0; y<h; y++){
2228         for(x=0; x<w; x++){
2229             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2230         }
2231     }
2232
2233     assert(acc>=0);
2234
2235     return acc;
2236 }
2237
2238 static int sse_mb(MpegEncContext *s){
2239     int w= 16;
2240     int h= 16;
2241
2242     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2243     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2244
2245     if(w==16 && h==16)
2246       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2247         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2248                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2249                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2250       }else{
2251         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2252                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2253                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2254       }
2255     else
2256         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2257                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2258                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2259 }
2260
2261 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2262     MpegEncContext *s= *(void**)arg;
2263
2264
2265     s->me.pre_pass=1;
2266     s->me.dia_size= s->avctx->pre_dia_size;
2267     s->first_slice_line=1;
2268     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2269         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2270             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2271         }
2272         s->first_slice_line=0;
2273     }
2274
2275     s->me.pre_pass=0;
2276
2277     return 0;
2278 }
2279
2280 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2281     MpegEncContext *s= *(void**)arg;
2282
2283     ff_check_alignment();
2284
2285     s->me.dia_size= s->avctx->dia_size;
2286     s->first_slice_line=1;
2287     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2288         s->mb_x=0; //for block init below
2289         ff_init_block_index(s);
2290         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2291             s->block_index[0]+=2;
2292             s->block_index[1]+=2;
2293             s->block_index[2]+=2;
2294             s->block_index[3]+=2;
2295
2296             /* compute motion vector & mb_type and store in context */
2297             if(s->pict_type==AV_PICTURE_TYPE_B)
2298                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2299             else
2300                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2301         }
2302         s->first_slice_line=0;
2303     }
2304     return 0;
2305 }
2306
2307 static int mb_var_thread(AVCodecContext *c, void *arg){
2308     MpegEncContext *s= *(void**)arg;
2309     int mb_x, mb_y;
2310
2311     ff_check_alignment();
2312
2313     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2314         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2315             int xx = mb_x * 16;
2316             int yy = mb_y * 16;
2317             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2318             int varc;
2319             int sum = s->dsp.pix_sum(pix, s->linesize);
2320
2321             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2322
2323             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2324             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2325             s->me.mb_var_sum_temp    += varc;
2326         }
2327     }
2328     return 0;
2329 }
2330
2331 static void write_slice_end(MpegEncContext *s){
2332     if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4){
2333         if(s->partitioned_frame){
2334             ff_mpeg4_merge_partitions(s);
2335         }
2336
2337         ff_mpeg4_stuffing(&s->pb);
2338     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2339         ff_mjpeg_encode_stuffing(&s->pb);
2340     }
2341
2342     avpriv_align_put_bits(&s->pb);
2343     flush_put_bits(&s->pb);
2344
2345     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2346         s->misc_bits+= get_bits_diff(s);
2347 }
2348
2349 static void write_mb_info(MpegEncContext *s)
2350 {
2351     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2352     int offset = put_bits_count(&s->pb);
2353     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2354     int gobn = s->mb_y / s->gob_index;
2355     int pred_x, pred_y;
2356     if (CONFIG_H263_ENCODER)
2357         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2358     bytestream_put_le32(&ptr, offset);
2359     bytestream_put_byte(&ptr, s->qscale);
2360     bytestream_put_byte(&ptr, gobn);
2361     bytestream_put_le16(&ptr, mba);
2362     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2363     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2364     /* 4MV not implemented */
2365     bytestream_put_byte(&ptr, 0); /* hmv2 */
2366     bytestream_put_byte(&ptr, 0); /* vmv2 */
2367 }
2368
2369 static void update_mb_info(MpegEncContext *s, int startcode)
2370 {
2371     if (!s->mb_info)
2372         return;
2373     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2374         s->mb_info_size += 12;
2375         s->prev_mb_info = s->last_mb_info;
2376     }
2377     if (startcode) {
2378         s->prev_mb_info = put_bits_count(&s->pb)/8;
2379         /* This might have incremented mb_info_size above, and we return without
2380          * actually writing any info into that slot yet. But in that case,
2381          * this will be called again at the start of the after writing the
2382          * start code, actually writing the mb info. */
2383         return;
2384     }
2385
2386     s->last_mb_info = put_bits_count(&s->pb)/8;
2387     if (!s->mb_info_size)
2388         s->mb_info_size += 12;
2389     write_mb_info(s);
2390 }
2391
2392 static int encode_thread(AVCodecContext *c, void *arg){
2393     MpegEncContext *s= *(void**)arg;
2394     int mb_x, mb_y, pdif = 0;
2395     int chr_h= 16>>s->chroma_y_shift;
2396     int i, j;
2397     MpegEncContext best_s, backup_s;
2398     uint8_t bit_buf[2][MAX_MB_BYTES];
2399     uint8_t bit_buf2[2][MAX_MB_BYTES];
2400     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2401     PutBitContext pb[2], pb2[2], tex_pb[2];
2402 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2403
2404     ff_check_alignment();
2405
2406     for(i=0; i<2; i++){
2407         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2408         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2409         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2410     }
2411
2412     s->last_bits= put_bits_count(&s->pb);
2413     s->mv_bits=0;
2414     s->misc_bits=0;
2415     s->i_tex_bits=0;
2416     s->p_tex_bits=0;
2417     s->i_count=0;
2418     s->f_count=0;
2419     s->b_count=0;
2420     s->skip_count=0;
2421
2422     for(i=0; i<3; i++){
2423         /* init last dc values */
2424         /* note: quant matrix value (8) is implied here */
2425         s->last_dc[i] = 128 << s->intra_dc_precision;
2426
2427         s->current_picture.f.error[i] = 0;
2428     }
2429     if(s->codec_id==CODEC_ID_AMV){
2430         s->last_dc[0] = 128*8/13;
2431         s->last_dc[1] = 128*8/14;
2432         s->last_dc[2] = 128*8/14;
2433     }
2434     s->mb_skip_run = 0;
2435     memset(s->last_mv, 0, sizeof(s->last_mv));
2436
2437     s->last_mv_dir = 0;
2438
2439     switch(s->codec_id){
2440     case CODEC_ID_H263:
2441     case CODEC_ID_H263P:
2442     case CODEC_ID_FLV1:
2443         if (CONFIG_H263_ENCODER)
2444             s->gob_index = ff_h263_get_gob_height(s);
2445         break;
2446     case CODEC_ID_MPEG4:
2447         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2448             ff_mpeg4_init_partitions(s);
2449         break;
2450     }
2451
2452     s->resync_mb_x=0;
2453     s->resync_mb_y=0;
2454     s->first_slice_line = 1;
2455     s->ptr_lastgob = s->pb.buf;
2456     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2457 //    printf("row %d at %X\n", s->mb_y, (int)s);
2458         s->mb_x=0;
2459         s->mb_y= mb_y;
2460
2461         ff_set_qscale(s, s->qscale);
2462         ff_init_block_index(s);
2463
2464         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2465             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2466             int mb_type= s->mb_type[xy];
2467 //            int d;
2468             int dmin= INT_MAX;
2469             int dir;
2470
2471             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2472                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2473                 return -1;
2474             }
2475             if(s->data_partitioning){
2476                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2477                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2478                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2479                     return -1;
2480                 }
2481             }
2482
2483             s->mb_x = mb_x;
2484             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2485             ff_update_block_index(s);
2486
2487             if(CONFIG_H261_ENCODER && s->codec_id == CODEC_ID_H261){
2488                 ff_h261_reorder_mb_index(s);
2489                 xy= s->mb_y*s->mb_stride + s->mb_x;
2490                 mb_type= s->mb_type[xy];
2491             }
2492
2493             /* write gob / video packet header  */
2494             if(s->rtp_mode){
2495                 int current_packet_size, is_gob_start;
2496
2497                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2498
2499                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2500
2501                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2502
2503                 switch(s->codec_id){
2504                 case CODEC_ID_H263:
2505                 case CODEC_ID_H263P:
2506                     if(!s->h263_slice_structured)
2507                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2508                     break;
2509                 case CODEC_ID_MPEG2VIDEO:
2510                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2511                 case CODEC_ID_MPEG1VIDEO:
2512                     if(s->mb_skip_run) is_gob_start=0;
2513                     break;
2514                 }
2515
2516                 if(is_gob_start){
2517                     if(s->start_mb_y != mb_y || mb_x!=0){
2518                         write_slice_end(s);
2519
2520                         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
2521                             ff_mpeg4_init_partitions(s);
2522                         }
2523                     }
2524
2525                     assert((put_bits_count(&s->pb)&7) == 0);
2526                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2527
2528                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2529                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2530                         int d= 100 / s->avctx->error_rate;
2531                         if(r % d == 0){
2532                             current_packet_size=0;
2533                             s->pb.buf_ptr= s->ptr_lastgob;
2534                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2535                         }
2536                     }
2537
2538                     if (s->avctx->rtp_callback){
2539                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2540                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2541                     }
2542                     update_mb_info(s, 1);
2543
2544                     switch(s->codec_id){
2545                     case CODEC_ID_MPEG4:
2546                         if (CONFIG_MPEG4_ENCODER) {
2547                             ff_mpeg4_encode_video_packet_header(s);
2548                             ff_mpeg4_clean_buffers(s);
2549                         }
2550                     break;
2551                     case CODEC_ID_MPEG1VIDEO:
2552                     case CODEC_ID_MPEG2VIDEO:
2553                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2554                             ff_mpeg1_encode_slice_header(s);
2555                             ff_mpeg1_clean_buffers(s);
2556                         }
2557                     break;
2558                     case CODEC_ID_H263:
2559                     case CODEC_ID_H263P:
2560                         if (CONFIG_H263_ENCODER)
2561                             ff_h263_encode_gob_header(s, mb_y);
2562                     break;
2563                     }
2564
2565                     if(s->flags&CODEC_FLAG_PASS1){
2566                         int bits= put_bits_count(&s->pb);
2567                         s->misc_bits+= bits - s->last_bits;
2568                         s->last_bits= bits;
2569                     }
2570
2571                     s->ptr_lastgob += current_packet_size;
2572                     s->first_slice_line=1;
2573                     s->resync_mb_x=mb_x;
2574                     s->resync_mb_y=mb_y;
2575                 }
2576             }
2577
2578             if(  (s->resync_mb_x   == s->mb_x)
2579                && s->resync_mb_y+1 == s->mb_y){
2580                 s->first_slice_line=0;
2581             }
2582
2583             s->mb_skipped=0;
2584             s->dquant=0; //only for QP_RD
2585
2586             update_mb_info(s, 0);
2587
2588             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2589                 int next_block=0;
2590                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2591
2592                 copy_context_before_encode(&backup_s, s, -1);
2593                 backup_s.pb= s->pb;
2594                 best_s.data_partitioning= s->data_partitioning;
2595                 best_s.partitioned_frame= s->partitioned_frame;
2596                 if(s->data_partitioning){
2597                     backup_s.pb2= s->pb2;
2598                     backup_s.tex_pb= s->tex_pb;
2599                 }
2600
2601                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2602                     s->mv_dir = MV_DIR_FORWARD;
2603                     s->mv_type = MV_TYPE_16X16;
2604                     s->mb_intra= 0;
2605                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2606                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2607                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2608                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2609                 }
2610                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2611                     s->mv_dir = MV_DIR_FORWARD;
2612                     s->mv_type = MV_TYPE_FIELD;
2613                     s->mb_intra= 0;
2614                     for(i=0; i<2; i++){
2615                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2616                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2617                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2618                     }
2619                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2620                                  &dmin, &next_block, 0, 0);
2621                 }
2622                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2623                     s->mv_dir = MV_DIR_FORWARD;
2624                     s->mv_type = MV_TYPE_16X16;
2625                     s->mb_intra= 0;
2626                     s->mv[0][0][0] = 0;
2627                     s->mv[0][0][1] = 0;
2628                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2629                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2630                 }
2631                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2632                     s->mv_dir = MV_DIR_FORWARD;
2633                     s->mv_type = MV_TYPE_8X8;
2634                     s->mb_intra= 0;
2635                     for(i=0; i<4; i++){
2636                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2637                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2638                     }
2639                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2640                                  &dmin, &next_block, 0, 0);
2641                 }
2642                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2643                     s->mv_dir = MV_DIR_FORWARD;
2644                     s->mv_type = MV_TYPE_16X16;
2645                     s->mb_intra= 0;
2646                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2647                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2648                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2649                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2650                 }
2651                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2652                     s->mv_dir = MV_DIR_BACKWARD;
2653                     s->mv_type = MV_TYPE_16X16;
2654                     s->mb_intra= 0;
2655                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2656                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2657                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2658                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2659                 }
2660                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2661                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2662                     s->mv_type = MV_TYPE_16X16;
2663                     s->mb_intra= 0;
2664                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2665                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2666                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2667                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2668                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2669                                  &dmin, &next_block, 0, 0);
2670                 }
2671                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2672                     s->mv_dir = MV_DIR_FORWARD;
2673                     s->mv_type = MV_TYPE_FIELD;
2674                     s->mb_intra= 0;
2675                     for(i=0; i<2; i++){
2676                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2677                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2678                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2679                     }
2680                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2681                                  &dmin, &next_block, 0, 0);
2682                 }
2683                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2684                     s->mv_dir = MV_DIR_BACKWARD;
2685                     s->mv_type = MV_TYPE_FIELD;
2686                     s->mb_intra= 0;
2687                     for(i=0; i<2; i++){
2688                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2689                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2690                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2691                     }
2692                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2693                                  &dmin, &next_block, 0, 0);
2694                 }
2695                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2696                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2697                     s->mv_type = MV_TYPE_FIELD;
2698                     s->mb_intra= 0;
2699                     for(dir=0; dir<2; dir++){
2700                         for(i=0; i<2; i++){
2701                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2702                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2703                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2704                         }
2705                     }
2706                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2707                                  &dmin, &next_block, 0, 0);
2708                 }
2709                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2710                     s->mv_dir = 0;
2711                     s->mv_type = MV_TYPE_16X16;
2712                     s->mb_intra= 1;
2713                     s->mv[0][0][0] = 0;
2714                     s->mv[0][0][1] = 0;
2715                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2716                                  &dmin, &next_block, 0, 0);
2717                     if(s->h263_pred || s->h263_aic){
2718                         if(best_s.mb_intra)
2719                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2720                         else
2721                             ff_clean_intra_table_entries(s); //old mode?
2722                     }
2723                 }
2724
2725                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2726                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2727                         const int last_qp= backup_s.qscale;
2728                         int qpi, qp, dc[6];
2729                         DCTELEM ac[6][16];
2730                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2731                         static const int dquant_tab[4]={-1,1,-2,2};
2732
2733                         assert(backup_s.dquant == 0);
2734
2735                         //FIXME intra
2736                         s->mv_dir= best_s.mv_dir;
2737                         s->mv_type = MV_TYPE_16X16;
2738                         s->mb_intra= best_s.mb_intra;
2739                         s->mv[0][0][0] = best_s.mv[0][0][0];
2740                         s->mv[0][0][1] = best_s.mv[0][0][1];
2741                         s->mv[1][0][0] = best_s.mv[1][0][0];
2742                         s->mv[1][0][1] = best_s.mv[1][0][1];
2743
2744                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2745                         for(; qpi<4; qpi++){
2746                             int dquant= dquant_tab[qpi];
2747                             qp= last_qp + dquant;
2748                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2749                                 continue;
2750                             backup_s.dquant= dquant;
2751                             if(s->mb_intra && s->dc_val[0]){
2752                                 for(i=0; i<6; i++){
2753                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2754                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2755                                 }
2756                             }
2757
2758                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2759                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2760                             if(best_s.qscale != qp){
2761                                 if(s->mb_intra && s->dc_val[0]){
2762                                     for(i=0; i<6; i++){
2763                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2764                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2765                                     }
2766                                 }
2767                             }
2768                         }
2769                     }
2770                 }
2771                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2772                     int mx= s->b_direct_mv_table[xy][0];
2773                     int my= s->b_direct_mv_table[xy][1];
2774
2775                     backup_s.dquant = 0;
2776                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2777                     s->mb_intra= 0;
2778                     ff_mpeg4_set_direct_mv(s, mx, my);
2779                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2780                                  &dmin, &next_block, mx, my);
2781                 }
2782                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2783                     backup_s.dquant = 0;
2784                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2785                     s->mb_intra= 0;
2786                     ff_mpeg4_set_direct_mv(s, 0, 0);
2787                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2788                                  &dmin, &next_block, 0, 0);
2789                 }
2790                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2791                     int coded=0;
2792                     for(i=0; i<6; i++)
2793                         coded |= s->block_last_index[i];
2794                     if(coded){
2795                         int mx,my;
2796                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2797                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2798                             mx=my=0; //FIXME find the one we actually used
2799                             ff_mpeg4_set_direct_mv(s, mx, my);
2800                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2801                             mx= s->mv[1][0][0];
2802                             my= s->mv[1][0][1];
2803                         }else{
2804                             mx= s->mv[0][0][0];
2805                             my= s->mv[0][0][1];
2806                         }
2807
2808                         s->mv_dir= best_s.mv_dir;
2809                         s->mv_type = best_s.mv_type;
2810                         s->mb_intra= 0;
2811 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2812                         s->mv[0][0][1] = best_s.mv[0][0][1];
2813                         s->mv[1][0][0] = best_s.mv[1][0][0];
2814                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2815                         backup_s.dquant= 0;
2816                         s->skipdct=1;
2817                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2818                                         &dmin, &next_block, mx, my);
2819                         s->skipdct=0;
2820                     }
2821                 }
2822
2823                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2824
2825                 copy_context_after_encode(s, &best_s, -1);
2826
2827                 pb_bits_count= put_bits_count(&s->pb);
2828                 flush_put_bits(&s->pb);
2829                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2830                 s->pb= backup_s.pb;
2831
2832                 if(s->data_partitioning){
2833                     pb2_bits_count= put_bits_count(&s->pb2);
2834                     flush_put_bits(&s->pb2);
2835                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2836                     s->pb2= backup_s.pb2;
2837
2838                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2839                     flush_put_bits(&s->tex_pb);
2840                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2841                     s->tex_pb= backup_s.tex_pb;
2842                 }
2843                 s->last_bits= put_bits_count(&s->pb);
2844
2845                 if (CONFIG_H263_ENCODER &&
2846                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2847                     ff_h263_update_motion_val(s);
2848
2849                 if(next_block==0){ //FIXME 16 vs linesize16
2850                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2851                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2852                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2853                 }
2854
2855                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2856                     ff_MPV_decode_mb(s, s->block);
2857             } else {
2858                 int motion_x = 0, motion_y = 0;
2859                 s->mv_type=MV_TYPE_16X16;
2860                 // only one MB-Type possible
2861
2862                 switch(mb_type){
2863                 case CANDIDATE_MB_TYPE_INTRA:
2864                     s->mv_dir = 0;
2865                     s->mb_intra= 1;
2866                     motion_x= s->mv[0][0][0] = 0;
2867                     motion_y= s->mv[0][0][1] = 0;
2868                     break;
2869                 case CANDIDATE_MB_TYPE_INTER:
2870                     s->mv_dir = MV_DIR_FORWARD;
2871                     s->mb_intra= 0;
2872                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2873                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2874                     break;
2875                 case CANDIDATE_MB_TYPE_INTER_I:
2876                     s->mv_dir = MV_DIR_FORWARD;
2877                     s->mv_type = MV_TYPE_FIELD;
2878                     s->mb_intra= 0;
2879                     for(i=0; i<2; i++){
2880                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2881                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2882                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2883                     }
2884                     break;
2885                 case CANDIDATE_MB_TYPE_INTER4V:
2886                     s->mv_dir = MV_DIR_FORWARD;
2887                     s->mv_type = MV_TYPE_8X8;
2888                     s->mb_intra= 0;
2889                     for(i=0; i<4; i++){
2890                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2891                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2892                     }
2893                     break;
2894                 case CANDIDATE_MB_TYPE_DIRECT:
2895                     if (CONFIG_MPEG4_ENCODER) {
2896                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2897                         s->mb_intra= 0;
2898                         motion_x=s->b_direct_mv_table[xy][0];
2899                         motion_y=s->b_direct_mv_table[xy][1];
2900                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2901                     }
2902                     break;
2903                 case CANDIDATE_MB_TYPE_DIRECT0:
2904                     if (CONFIG_MPEG4_ENCODER) {
2905                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2906                         s->mb_intra= 0;
2907                         ff_mpeg4_set_direct_mv(s, 0, 0);
2908                     }
2909                     break;
2910                 case CANDIDATE_MB_TYPE_BIDIR:
2911                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2912                     s->mb_intra= 0;
2913                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2914                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2915                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2916                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2917                     break;
2918                 case CANDIDATE_MB_TYPE_BACKWARD:
2919                     s->mv_dir = MV_DIR_BACKWARD;
2920                     s->mb_intra= 0;
2921                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2922                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2923                     break;
2924                 case CANDIDATE_MB_TYPE_FORWARD:
2925                     s->mv_dir = MV_DIR_FORWARD;
2926                     s->mb_intra= 0;
2927                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2928                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2929 //                    printf(" %d %d ", motion_x, motion_y);
2930                     break;
2931                 case CANDIDATE_MB_TYPE_FORWARD_I:
2932                     s->mv_dir = MV_DIR_FORWARD;
2933                     s->mv_type = MV_TYPE_FIELD;
2934                     s->mb_intra= 0;
2935                     for(i=0; i<2; i++){
2936                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2937                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2938                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2939                     }
2940                     break;
2941                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2942                     s->mv_dir = MV_DIR_BACKWARD;
2943                     s->mv_type = MV_TYPE_FIELD;
2944                     s->mb_intra= 0;
2945                     for(i=0; i<2; i++){
2946                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2947                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2948                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2949                     }
2950                     break;
2951                 case CANDIDATE_MB_TYPE_BIDIR_I:
2952                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2953                     s->mv_type = MV_TYPE_FIELD;
2954                     s->mb_intra= 0;
2955                     for(dir=0; dir<2; dir++){
2956                         for(i=0; i<2; i++){
2957                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2958                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2959                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2960                         }
2961                     }
2962                     break;
2963                 default:
2964                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2965                 }
2966
2967                 encode_mb(s, motion_x, motion_y);
2968
2969                 // RAL: Update last macroblock type
2970                 s->last_mv_dir = s->mv_dir;
2971
2972                 if (CONFIG_H263_ENCODER &&
2973                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2974                     ff_h263_update_motion_val(s);
2975
2976                 ff_MPV_decode_mb(s, s->block);
2977             }
2978
2979             /* clean the MV table in IPS frames for direct mode in B frames */
2980             if(s->mb_intra /* && I,P,S_TYPE */){
2981                 s->p_mv_table[xy][0]=0;
2982                 s->p_mv_table[xy][1]=0;
2983             }
2984
2985             if(s->flags&CODEC_FLAG_PSNR){
2986                 int w= 16;
2987                 int h= 16;
2988
2989                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2990                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2991
2992                 s->current_picture.f.error[0] += sse(
2993                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2994                     s->dest[0], w, h, s->linesize);
2995                 s->current_picture.f.error[1] += sse(
2996                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2997                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2998                 s->current_picture.f.error[2] += sse(
2999                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3000                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3001             }
3002             if(s->loop_filter){
3003                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3004                     ff_h263_loop_filter(s);
3005             }
3006 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
3007         }
3008     }
3009
3010     //not beautiful here but we must write it before flushing so it has to be here
3011     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3012         ff_msmpeg4_encode_ext_header(s);
3013
3014     write_slice_end(s);
3015
3016     /* Send the last GOB if RTP */
3017     if (s->avctx->rtp_callback) {
3018         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3019         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3020         /* Call the RTP callback to send the last GOB */
3021         emms_c();
3022         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3023     }
3024
3025     return 0;
3026 }
3027
3028 #define MERGE(field) dst->field += src->field; src->field=0
3029 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3030     MERGE(me.scene_change_score);
3031     MERGE(me.mc_mb_var_sum_temp);
3032     MERGE(me.mb_var_sum_temp);
3033 }
3034
3035 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3036     int i;
3037
3038     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3039     MERGE(dct_count[1]);
3040     MERGE(mv_bits);
3041     MERGE(i_tex_bits);
3042     MERGE(p_tex_bits);
3043     MERGE(i_count);
3044     MERGE(f_count);
3045     MERGE(b_count);
3046     MERGE(skip_count);
3047     MERGE(misc_bits);
3048     MERGE(error_count);
3049     MERGE(padding_bug_score);
3050     MERGE(current_picture.f.error[0]);
3051     MERGE(current_picture.f.error[1]);
3052     MERGE(current_picture.f.error[2]);
3053
3054     if(dst->avctx->noise_reduction){
3055         for(i=0; i<64; i++){
3056             MERGE(dct_error_sum[0][i]);
3057             MERGE(dct_error_sum[1][i]);
3058         }
3059     }
3060
3061     assert(put_bits_count(&src->pb) % 8 ==0);
3062     assert(put_bits_count(&dst->pb) % 8 ==0);
3063     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3064     flush_put_bits(&dst->pb);
3065 }
3066
3067 static int estimate_qp(MpegEncContext *s, int dry_run){
3068     if (s->next_lambda){
3069         s->current_picture_ptr->f.quality =
3070         s->current_picture.f.quality = s->next_lambda;
3071         if(!dry_run) s->next_lambda= 0;
3072     } else if (!s->fixed_qscale) {
3073         s->current_picture_ptr->f.quality =
3074         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3075         if (s->current_picture.f.quality < 0)
3076             return -1;
3077     }
3078
3079     if(s->adaptive_quant){
3080         switch(s->codec_id){
3081         case CODEC_ID_MPEG4:
3082             if (CONFIG_MPEG4_ENCODER)
3083                 ff_clean_mpeg4_qscales(s);
3084             break;
3085         case CODEC_ID_H263:
3086         case CODEC_ID_H263P:
3087         case CODEC_ID_FLV1:
3088             if (CONFIG_H263_ENCODER)
3089                 ff_clean_h263_qscales(s);
3090             break;
3091         default:
3092             ff_init_qscale_tab(s);
3093         }
3094
3095         s->lambda= s->lambda_table[0];
3096         //FIXME broken
3097     }else
3098         s->lambda = s->current_picture.f.quality;
3099 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3100     update_qscale(s);
3101     return 0;
3102 }
3103
3104 /* must be called before writing the header */
3105 static void set_frame_distances(MpegEncContext * s){
3106     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3107     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3108
3109     if(s->pict_type==AV_PICTURE_TYPE_B){
3110         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3111         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3112     }else{
3113         s->pp_time= s->time - s->last_non_b_time;
3114         s->last_non_b_time= s->time;
3115         assert(s->picture_number==0 || s->pp_time > 0);
3116     }
3117 }
3118
3119 static int encode_picture(MpegEncContext *s, int picture_number)
3120 {
3121     int i;
3122     int bits;
3123     int context_count = s->slice_context_count;
3124
3125     s->picture_number = picture_number;
3126
3127     /* Reset the average MB variance */
3128     s->me.mb_var_sum_temp    =
3129     s->me.mc_mb_var_sum_temp = 0;
3130
3131     /* we need to initialize some time vars before we can encode b-frames */
3132     // RAL: Condition added for MPEG1VIDEO
3133     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3134         set_frame_distances(s);
3135     if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4)
3136         ff_set_mpeg4_time(s);
3137
3138     s->me.scene_change_score=0;
3139
3140 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3141
3142     if(s->pict_type==AV_PICTURE_TYPE_I){
3143         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3144         else                        s->no_rounding=0;
3145     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3146         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3147             s->no_rounding ^= 1;
3148     }
3149
3150     if(s->flags & CODEC_FLAG_PASS2){
3151         if (estimate_qp(s,1) < 0)
3152             return -1;
3153         ff_get_2pass_fcode(s);
3154     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3155         if(s->pict_type==AV_PICTURE_TYPE_B)
3156             s->lambda= s->last_lambda_for[s->pict_type];
3157         else
3158             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3159         update_qscale(s);
3160     }
3161
3162     if(s->codec_id != CODEC_ID_AMV){
3163         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3164         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3165         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3166         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3167     }
3168
3169     s->mb_intra=0; //for the rate distortion & bit compare functions
3170     for(i=1; i<context_count; i++){
3171         ff_update_duplicate_context(s->thread_context[i], s);
3172     }
3173
3174     if(ff_init_me(s)<0)
3175         return -1;
3176
3177     /* Estimate motion for every MB */
3178     if(s->pict_type != AV_PICTURE_TYPE_I){
3179         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3180         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3181         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3182             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3183                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3184             }
3185         }
3186
3187         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3188     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3189         /* I-Frame */
3190         for(i=0; i<s->mb_stride*s->mb_height; i++)
3191             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3192
3193         if(!s->fixed_qscale){
3194             /* finding spatial complexity for I-frame rate control */
3195             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3196         }
3197     }
3198     for(i=1; i<context_count; i++){
3199         merge_context_after_me(s, s->thread_context[i]);
3200     }
3201     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3202     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3203     emms_c();
3204
3205     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3206         s->pict_type= AV_PICTURE_TYPE_I;
3207         for(i=0; i<s->mb_stride*s->mb_height; i++)
3208             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3209 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3210     }
3211
3212     if(!s->umvplus){
3213         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3214             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3215
3216             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3217                 int a,b;
3218                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3219                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3220                 s->f_code= FFMAX3(s->f_code, a, b);
3221             }
3222
3223             ff_fix_long_p_mvs(s);
3224             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3225             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3226                 int j;
3227                 for(i=0; i<2; i++){
3228                     for(j=0; j<2; j++)
3229                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3230                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3231                 }
3232             }
3233         }
3234
3235         if(s->pict_type==AV_PICTURE_TYPE_B){
3236             int a, b;
3237
3238             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3239             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3240             s->f_code = FFMAX(a, b);
3241
3242             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3243             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3244             s->b_code = FFMAX(a, b);
3245
3246             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3247             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3248             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3249             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3250             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3251                 int dir, j;
3252                 for(dir=0; dir<2; dir++){
3253                     for(i=0; i<2; i++){
3254                         for(j=0; j<2; j++){
3255                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3256                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3257                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3258                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3259                         }
3260                     }
3261                 }
3262             }
3263         }
3264     }
3265
3266     if (estimate_qp(s, 0) < 0)
3267         return -1;
3268
3269     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3270         s->qscale= 3; //reduce clipping problems
3271
3272     if (s->out_format == FMT_MJPEG) {
3273         /* for mjpeg, we do include qscale in the matrix */
3274         for(i=1;i<64;i++){
3275             int j= s->dsp.idct_permutation[i];
3276
3277             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3278         }
3279         s->y_dc_scale_table=
3280         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3281         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3282         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3283                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3284         s->qscale= 8;
3285     }
3286     if(s->codec_id == CODEC_ID_AMV){
3287         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3288         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3289         for(i=1;i<64;i++){
3290             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3291
3292             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3293             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3294         }
3295         s->y_dc_scale_table= y;
3296         s->c_dc_scale_table= c;
3297         s->intra_matrix[0] = 13;
3298         s->chroma_intra_matrix[0] = 14;
3299         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3300                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3301         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3302                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3303         s->qscale= 8;
3304     }
3305
3306     //FIXME var duplication
3307     s->current_picture_ptr->f.key_frame =
3308     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3309     s->current_picture_ptr->f.pict_type =
3310     s->current_picture.f.pict_type = s->pict_type;
3311
3312     if (s->current_picture.f.key_frame)
3313         s->picture_in_gop_number=0;
3314
3315     s->last_bits= put_bits_count(&s->pb);
3316     switch(s->out_format) {
3317     case FMT_MJPEG:
3318         if (CONFIG_MJPEG_ENCODER)
3319             ff_mjpeg_encode_picture_header(s);
3320         break;
3321     case FMT_H261:
3322         if (CONFIG_H261_ENCODER)
3323             ff_h261_encode_picture_header(s, picture_number);
3324         break;
3325     case FMT_H263:
3326         if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
3327             ff_wmv2_encode_picture_header(s, picture_number);
3328         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3329             ff_msmpeg4_encode_picture_header(s, picture_number);
3330         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3331             ff_mpeg4_encode_picture_header(s, picture_number);
3332         else if (CONFIG_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
3333             ff_rv10_encode_picture_header(s, picture_number);
3334         else if (CONFIG_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
3335             ff_rv20_encode_picture_header(s, picture_number);
3336         else if (CONFIG_FLV_ENCODER && s->codec_id == CODEC_ID_FLV1)
3337             ff_flv_encode_picture_header(s, picture_number);
3338         else if (CONFIG_H263_ENCODER)
3339             ff_h263_encode_picture_header(s, picture_number);
3340         break;
3341     case FMT_MPEG1:
3342         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3343             ff_mpeg1_encode_picture_header(s, picture_number);
3344         break;
3345     case FMT_H264:
3346         break;
3347     default:
3348         assert(0);
3349     }
3350     bits= put_bits_count(&s->pb);
3351     s->header_bits= bits - s->last_bits;
3352
3353     for(i=1; i<context_count; i++){
3354         update_duplicate_context_after_me(s->thread_context[i], s);
3355     }
3356     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3357     for(i=1; i<context_count; i++){
3358         merge_context_after_encode(s, s->thread_context[i]);
3359     }
3360     emms_c();
3361     return 0;
3362 }
3363
3364 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3365     const int intra= s->mb_intra;
3366     int i;
3367
3368     s->dct_count[intra]++;
3369
3370     for(i=0; i<64; i++){
3371         int level= block[i];
3372
3373         if(level){
3374             if(level>0){
3375                 s->dct_error_sum[intra][i] += level;
3376                 level -= s->dct_offset[intra][i];
3377                 if(level<0) level=0;
3378             }else{
3379                 s->dct_error_sum[intra][i] -= level;
3380                 level += s->dct_offset[intra][i];
3381                 if(level>0) level=0;
3382             }
3383             block[i]= level;
3384         }
3385     }
3386 }
3387
3388 static int dct_quantize_trellis_c(MpegEncContext *s,
3389                                   DCTELEM *block, int n,
3390                                   int qscale, int *overflow){
3391     const int *qmat;
3392     const uint8_t *scantable= s->intra_scantable.scantable;
3393     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3394     int max=0;
3395     unsigned int threshold1, threshold2;
3396     int bias=0;
3397     int run_tab[65];
3398     int level_tab[65];
3399     int score_tab[65];
3400     int survivor[65];
3401     int survivor_count;
3402     int last_run=0;
3403     int last_level=0;
3404     int last_score= 0;
3405     int last_i;
3406     int coeff[2][64];
3407     int coeff_count[64];
3408     int qmul, qadd, start_i, last_non_zero, i, dc;
3409     const int esc_length= s->ac_esc_length;
3410     uint8_t * length;
3411     uint8_t * last_length;
3412     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3413
3414     s->dsp.fdct (block);
3415
3416     if(s->dct_error_sum)
3417         s->denoise_dct(s, block);
3418     qmul= qscale*16;
3419     qadd= ((qscale-1)|1)*8;
3420
3421     if (s->mb_intra) {
3422         int q;
3423         if (!s->h263_aic) {
3424             if (n < 4)
3425                 q = s->y_dc_scale;
3426             else
3427                 q = s->c_dc_scale;
3428             q = q << 3;
3429         } else{
3430             /* For AIC we skip quant/dequant of INTRADC */
3431             q = 1 << 3;
3432             qadd=0;
3433         }
3434
3435         /* note: block[0] is assumed to be positive */
3436         block[0] = (block[0] + (q >> 1)) / q;
3437         start_i = 1;
3438         last_non_zero = 0;
3439         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3440         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3441             bias= 1<<(QMAT_SHIFT-1);
3442         length     = s->intra_ac_vlc_length;
3443         last_length= s->intra_ac_vlc_last_length;
3444     } else {
3445         start_i = 0;
3446         last_non_zero = -1;
3447         qmat = s->q_inter_matrix[qscale];
3448         length     = s->inter_ac_vlc_length;
3449         last_length= s->inter_ac_vlc_last_length;
3450     }
3451     last_i= start_i;
3452
3453     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3454     threshold2= (threshold1<<1);
3455
3456     for(i=63; i>=start_i; i--) {
3457         const int j = scantable[i];
3458         int level = block[j] * qmat[j];
3459
3460         if(((unsigned)(level+threshold1))>threshold2){
3461             last_non_zero = i;
3462             break;
3463         }
3464     }
3465
3466     for(i=start_i; i<=last_non_zero; i++) {
3467         const int j = scantable[i];
3468         int level = block[j] * qmat[j];
3469
3470 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3471 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3472         if(((unsigned)(level+threshold1))>threshold2){
3473             if(level>0){
3474                 level= (bias + level)>>QMAT_SHIFT;
3475                 coeff[0][i]= level;
3476                 coeff[1][i]= level-1;
3477 //                coeff[2][k]= level-2;
3478             }else{
3479                 level= (bias - level)>>QMAT_SHIFT;
3480                 coeff[0][i]= -level;
3481                 coeff[1][i]= -level+1;
3482 //                coeff[2][k]= -level+2;
3483             }
3484             coeff_count[i]= FFMIN(level, 2);
3485             assert(coeff_count[i]);
3486             max |=level;
3487         }else{
3488             coeff[0][i]= (level>>31)|1;
3489             coeff_count[i]= 1;
3490         }
3491     }
3492
3493     *overflow= s->max_qcoeff < max; //overflow might have happened
3494
3495     if(last_non_zero < start_i){
3496         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3497         return last_non_zero;
3498     }
3499
3500     score_tab[start_i]= 0;
3501     survivor[0]= start_i;
3502     survivor_count= 1;
3503
3504     for(i=start_i; i<=last_non_zero; i++){
3505         int level_index, j, zero_distortion;
3506         int dct_coeff= FFABS(block[ scantable[i] ]);
3507         int best_score=256*256*256*120;
3508
3509         if (s->dsp.fdct == ff_fdct_ifast)
3510             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3511         zero_distortion= dct_coeff*dct_coeff;
3512
3513         for(level_index=0; level_index < coeff_count[i]; level_index++){
3514             int distortion;
3515             int level= coeff[level_index][i];
3516             const int alevel= FFABS(level);
3517             int unquant_coeff;
3518
3519             assert(level);
3520
3521             if(s->out_format == FMT_H263){
3522                 unquant_coeff= alevel*qmul + qadd;
3523             }else{ //MPEG1
3524                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3525                 if(s->mb_intra){
3526                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3527                         unquant_coeff =   (unquant_coeff - 1) | 1;
3528                 }else{
3529                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3530                         unquant_coeff =   (unquant_coeff - 1) | 1;
3531                 }
3532                 unquant_coeff<<= 3;
3533             }
3534
3535             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3536             level+=64;
3537             if((level&(~127)) == 0){
3538                 for(j=survivor_count-1; j>=0; j--){
3539                     int run= i - survivor[j];
3540                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3541                     score += score_tab[i-run];
3542
3543                     if(score < best_score){
3544                         best_score= score;
3545                         run_tab[i+1]= run;
3546                         level_tab[i+1]= level-64;
3547                     }
3548                 }
3549
3550                 if(s->out_format == FMT_H263){
3551                     for(j=survivor_count-1; j>=0; j--){
3552                         int run= i - survivor[j];
3553                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3554                         score += score_tab[i-run];
3555                         if(score < last_score){
3556                             last_score= score;
3557                             last_run= run;
3558                             last_level= level-64;
3559                             last_i= i+1;
3560                         }
3561                     }
3562                 }
3563             }else{
3564                 distortion += esc_length*lambda;
3565                 for(j=survivor_count-1; j>=0; j--){
3566                     int run= i - survivor[j];
3567                     int score= distortion + score_tab[i-run];
3568
3569                     if(score < best_score){
3570                         best_score= score;
3571                         run_tab[i+1]= run;
3572                         level_tab[i+1]= level-64;
3573                     }
3574                 }
3575
3576                 if(s->out_format == FMT_H263){
3577                   for(j=survivor_count-1; j>=0; j--){
3578                         int run= i - survivor[j];
3579                         int score= distortion + score_tab[i-run];
3580                         if(score < last_score){
3581                             last_score= score;
3582                             last_run= run;
3583                             last_level= level-64;
3584                             last_i= i+1;
3585                         }
3586                     }
3587                 }
3588             }
3589         }
3590
3591         score_tab[i+1]= best_score;
3592
3593         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3594         if(last_non_zero <= 27){
3595             for(; survivor_count; survivor_count--){
3596                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3597                     break;
3598             }
3599         }else{
3600             for(; survivor_count; survivor_count--){
3601                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3602                     break;
3603             }
3604         }
3605
3606         survivor[ survivor_count++ ]= i+1;
3607     }
3608
3609     if(s->out_format != FMT_H263){
3610         last_score= 256*256*256*120;
3611         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3612             int score= score_tab[i];
3613             if(i) score += lambda*2; //FIXME exacter?
3614
3615             if(score < last_score){
3616                 last_score= score;
3617                 last_i= i;
3618                 last_level= level_tab[i];
3619                 last_run= run_tab[i];
3620             }
3621         }
3622     }
3623
3624     s->coded_score[n] = last_score;
3625
3626     dc= FFABS(block[0]);
3627     last_non_zero= last_i - 1;
3628     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3629
3630     if(last_non_zero < start_i)
3631         return last_non_zero;
3632
3633     if(last_non_zero == 0 && start_i == 0){
3634         int best_level= 0;
3635         int best_score= dc * dc;
3636
3637         for(i=0; i<coeff_count[0]; i++){
3638             int level= coeff[i][0];
3639             int alevel= FFABS(level);
3640             int unquant_coeff, score, distortion;
3641
3642             if(s->out_format == FMT_H263){
3643                     unquant_coeff= (alevel*qmul + qadd)>>3;
3644             }else{ //MPEG1
3645                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3646                     unquant_coeff =   (unquant_coeff - 1) | 1;
3647             }
3648             unquant_coeff = (unquant_coeff + 4) >> 3;
3649             unquant_coeff<<= 3 + 3;
3650
3651             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3652             level+=64;
3653             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3654             else                    score= distortion + esc_length*lambda;
3655
3656             if(score < best_score){
3657                 best_score= score;
3658                 best_level= level - 64;
3659             }
3660         }
3661         block[0]= best_level;
3662         s->coded_score[n] = best_score - dc*dc;
3663         if(best_level == 0) return -1;
3664         else                return last_non_zero;
3665     }
3666
3667     i= last_i;
3668     assert(last_level);
3669
3670     block[ perm_scantable[last_non_zero] ]= last_level;
3671     i -= last_run + 1;
3672
3673     for(; i>start_i; i -= run_tab[i] + 1){
3674         block[ perm_scantable[i-1] ]= level_tab[i];
3675     }
3676
3677     return last_non_zero;
3678 }
3679
3680 //#define REFINE_STATS 1
3681 static int16_t basis[64][64];
3682
3683 static void build_basis(uint8_t *perm){
3684     int i, j, x, y;
3685     emms_c();
3686     for(i=0; i<8; i++){
3687         for(j=0; j<8; j++){
3688             for(y=0; y<8; y++){
3689                 for(x=0; x<8; x++){
3690                     double s= 0.25*(1<<BASIS_SHIFT);
3691                     int index= 8*i + j;
3692                     int perm_index= perm[index];
3693                     if(i==0) s*= sqrt(0.5);
3694                     if(j==0) s*= sqrt(0.5);
3695                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3696                 }
3697             }
3698         }
3699     }
3700 }
3701
3702 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3703                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3704                         int n, int qscale){
3705     int16_t rem[64];
3706     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3707     const uint8_t *scantable= s->intra_scantable.scantable;
3708     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3709 //    unsigned int threshold1, threshold2;
3710 //    int bias=0;
3711     int run_tab[65];
3712     int prev_run=0;
3713     int prev_level=0;
3714     int qmul, qadd, start_i, last_non_zero, i, dc;
3715     uint8_t * length;
3716     uint8_t * last_length;
3717     int lambda;
3718     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3719 #ifdef REFINE_STATS
3720 static int count=0;
3721 static int after_last=0;
3722 static int to_zero=0;
3723 static int from_zero=0;
3724 static int raise=0;
3725 static int lower=0;
3726 static int messed_sign=0;
3727 #endif
3728
3729     if(basis[0][0] == 0)
3730         build_basis(s->dsp.idct_permutation);
3731
3732     qmul= qscale*2;
3733     qadd= (qscale-1)|1;
3734     if (s->mb_intra) {
3735         if (!s->h263_aic) {
3736             if (n < 4)
3737                 q = s->y_dc_scale;
3738             else
3739                 q = s->c_dc_scale;
3740         } else{
3741             /* For AIC we skip quant/dequant of INTRADC */
3742             q = 1;
3743             qadd=0;
3744         }
3745         q <<= RECON_SHIFT-3;
3746         /* note: block[0] is assumed to be positive */
3747         dc= block[0]*q;
3748 //        block[0] = (block[0] + (q >> 1)) / q;
3749         start_i = 1;
3750 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3751 //            bias= 1<<(QMAT_SHIFT-1);
3752         length     = s->intra_ac_vlc_length;
3753         last_length= s->intra_ac_vlc_last_length;
3754     } else {
3755         dc= 0;
3756         start_i = 0;
3757         length     = s->inter_ac_vlc_length;
3758         last_length= s->inter_ac_vlc_last_length;
3759     }
3760     last_non_zero = s->block_last_index[n];
3761
3762 #ifdef REFINE_STATS
3763 {START_TIMER
3764 #endif
3765     dc += (1<<(RECON_SHIFT-1));
3766     for(i=0; i<64; i++){
3767         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3768     }
3769 #ifdef REFINE_STATS
3770 STOP_TIMER("memset rem[]")}
3771 #endif
3772     sum=0;
3773     for(i=0; i<64; i++){
3774         int one= 36;
3775         int qns=4;
3776         int w;
3777
3778         w= FFABS(weight[i]) + qns*one;
3779         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3780
3781         weight[i] = w;
3782 //        w=weight[i] = (63*qns + (w/2)) / w;
3783
3784         assert(w>0);
3785         assert(w<(1<<6));
3786         sum += w*w;
3787     }
3788     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3789 #ifdef REFINE_STATS
3790 {START_TIMER
3791 #endif
3792     run=0;
3793     rle_index=0;
3794     for(i=start_i; i<=last_non_zero; i++){
3795         int j= perm_scantable[i];
3796         const int level= block[j];
3797         int coeff;
3798
3799         if(level){
3800             if(level<0) coeff= qmul*level - qadd;
3801             else        coeff= qmul*level + qadd;
3802             run_tab[rle_index++]=run;
3803             run=0;
3804
3805             s->dsp.add_8x8basis(rem, basis[j], coeff);
3806         }else{
3807             run++;
3808         }
3809     }
3810 #ifdef REFINE_STATS
3811 if(last_non_zero>0){
3812 STOP_TIMER("init rem[]")
3813 }
3814 }
3815
3816 {START_TIMER
3817 #endif
3818     for(;;){
3819         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3820         int best_coeff=0;
3821         int best_change=0;
3822         int run2, best_unquant_change=0, analyze_gradient;
3823 #ifdef REFINE_STATS
3824 {START_TIMER
3825 #endif
3826         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3827
3828         if(analyze_gradient){
3829 #ifdef REFINE_STATS
3830 {START_TIMER
3831 #endif
3832             for(i=0; i<64; i++){
3833                 int w= weight[i];
3834
3835                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3836             }
3837 #ifdef REFINE_STATS
3838 STOP_TIMER("rem*w*w")}
3839 {START_TIMER
3840 #endif
3841             s->dsp.fdct(d1);
3842 #ifdef REFINE_STATS
3843 STOP_TIMER("dct")}
3844 #endif
3845         }
3846
3847         if(start_i){
3848             const int level= block[0];
3849             int change, old_coeff;
3850
3851             assert(s->mb_intra);
3852
3853             old_coeff= q*level;
3854
3855             for(change=-1; change<=1; change+=2){
3856                 int new_level= level + change;
3857                 int score, new_coeff;
3858
3859                 new_coeff= q*new_level;
3860                 if(new_coeff >= 2048 || new_coeff < 0)
3861                     continue;
3862
3863                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3864                 if(score<best_score){
3865                     best_score= score;
3866                     best_coeff= 0;
3867                     best_change= change;
3868                     best_unquant_change= new_coeff - old_coeff;
3869                 }
3870             }
3871         }
3872
3873         run=0;
3874         rle_index=0;
3875         run2= run_tab[rle_index++];
3876         prev_level=0;
3877         prev_run=0;
3878
3879         for(i=start_i; i<64; i++){
3880             int j= perm_scantable[i];
3881             const int level= block[j];
3882             int change, old_coeff;
3883
3884             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3885                 break;
3886
3887             if(level){
3888                 if(level<0) old_coeff= qmul*level - qadd;
3889                 else        old_coeff= qmul*level + qadd;
3890                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3891             }else{
3892                 old_coeff=0;
3893                 run2--;
3894                 assert(run2>=0 || i >= last_non_zero );
3895             }
3896
3897             for(change=-1; change<=1; change+=2){
3898                 int new_level= level + change;
3899                 int score, new_coeff, unquant_change;
3900
3901                 score=0;
3902                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3903                    continue;
3904
3905                 if(new_level){
3906                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3907                     else            new_coeff= qmul*new_level + qadd;
3908                     if(new_coeff >= 2048 || new_coeff <= -2048)
3909                         continue;
3910                     //FIXME check for overflow
3911
3912                     if(level){
3913                         if(level < 63 && level > -63){
3914                             if(i < last_non_zero)
3915                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3916                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3917                             else
3918                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3919                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3920                         }
3921                     }else{
3922                         assert(FFABS(new_level)==1);
3923
3924                         if(analyze_gradient){
3925                             int g= d1[ scantable[i] ];
3926                             if(g && (g^new_level) >= 0)
3927                                 continue;
3928                         }
3929
3930                         if(i < last_non_zero){
3931                             int next_i= i + run2 + 1;
3932                             int next_level= block[ perm_scantable[next_i] ] + 64;
3933
3934                             if(next_level&(~127))
3935                                 next_level= 0;
3936
3937                             if(next_i < last_non_zero)
3938                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3939                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3940                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3941                             else
3942                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3943                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3944                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3945                         }else{
3946                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3947                             if(prev_level){
3948                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3949                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3950                             }
3951                         }
3952                     }
3953                 }else{
3954                     new_coeff=0;
3955                     assert(FFABS(level)==1);
3956
3957                     if(i < last_non_zero){
3958                         int next_i= i + run2 + 1;
3959                         int next_level= block[ perm_scantable[next_i] ] + 64;
3960
3961                         if(next_level&(~127))
3962                             next_level= 0;
3963
3964                         if(next_i < last_non_zero)
3965                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3966                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3967                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3968                         else
3969                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3970                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3971                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3972                     }else{
3973                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3974                         if(prev_level){
3975                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3976                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3977                         }
3978                     }
3979                 }
3980
3981                 score *= lambda;
3982
3983                 unquant_change= new_coeff - old_coeff;
3984                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3985
3986                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3987                 if(score<best_score){
3988                     best_score= score;
3989                     best_coeff= i;
3990                     best_change= change;
3991                     best_unquant_change= unquant_change;
3992                 }
3993             }
3994             if(level){
3995                 prev_level= level + 64;
3996                 if(prev_level&(~127))
3997                     prev_level= 0;
3998                 prev_run= run;
3999                 run=0;
4000             }else{
4001                 run++;
4002             }
4003         }
4004 #ifdef REFINE_STATS
4005 STOP_TIMER("iterative step")}
4006 #endif
4007
4008         if(best_change){
4009             int j= perm_scantable[ best_coeff ];
4010
4011             block[j] += best_change;
4012
4013             if(best_coeff > last_non_zero){
4014                 last_non_zero= best_coeff;
4015                 assert(block[j]);
4016 #ifdef REFINE_STATS
4017 after_last++;
4018 #endif
4019             }else{
4020 #ifdef REFINE_STATS
4021 if(block[j]){
4022     if(block[j] - best_change){
4023         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4024             raise++;
4025         }else{
4026             lower++;
4027         }
4028     }else{
4029         from_zero++;
4030     }
4031 }else{
4032     to_zero++;
4033 }
4034 #endif
4035                 for(; last_non_zero>=start_i; last_non_zero--){
4036                     if(block[perm_scantable[last_non_zero]])
4037                         break;
4038                 }
4039             }
4040 #ifdef REFINE_STATS
4041 count++;
4042 if(256*256*256*64 % count == 0){
4043     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4044 }
4045 #endif
4046             run=0;
4047             rle_index=0;
4048             for(i=start_i; i<=last_non_zero; i++){
4049                 int j= perm_scantable[i];
4050                 const int level= block[j];
4051
4052                  if(level){
4053                      run_tab[rle_index++]=run;
4054                      run=0;
4055                  }else{
4056                      run++;
4057                  }
4058             }
4059
4060             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4061         }else{
4062             break;
4063         }
4064     }
4065 #ifdef REFINE_STATS
4066 if(last_non_zero>0){
4067 STOP_TIMER("iterative search")
4068 }
4069 }
4070 #endif
4071
4072     return last_non_zero;
4073 }
4074
4075 int ff_dct_quantize_c(MpegEncContext *s,
4076                         DCTELEM *block, int n,
4077                         int qscale, int *overflow)
4078 {
4079     int i, j, level, last_non_zero, q, start_i;
4080     const int *qmat;
4081     const uint8_t *scantable= s->intra_scantable.scantable;
4082     int bias;
4083     int max=0;
4084     unsigned int threshold1, threshold2;
4085
4086     s->dsp.fdct (block);
4087
4088     if(s->dct_error_sum)
4089         s->denoise_dct(s, block);
4090
4091     if (s->mb_intra) {
4092         if (!s->h263_aic) {
4093             if (n < 4)
4094                 q = s->y_dc_scale;
4095             else
4096                 q = s->c_dc_scale;
4097             q = q << 3;
4098         } else
4099             /* For AIC we skip quant/dequant of INTRADC */
4100             q = 1 << 3;
4101
4102         /* note: block[0] is assumed to be positive */
4103         block[0] = (block[0] + (q >> 1)) / q;
4104         start_i = 1;
4105         last_non_zero = 0;
4106         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4107         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4108     } else {
4109         start_i = 0;
4110         last_non_zero = -1;
4111         qmat = s->q_inter_matrix[qscale];
4112         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4113     }
4114     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4115     threshold2= (threshold1<<1);
4116     for(i=63;i>=start_i;i--) {
4117         j = scantable[i];
4118         level = block[j] * qmat[j];
4119
4120         if(((unsigned)(level+threshold1))>threshold2){
4121             last_non_zero = i;
4122             break;
4123         }else{
4124             block[j]=0;
4125         }
4126     }
4127     for(i=start_i; i<=last_non_zero; i++) {
4128         j = scantable[i];
4129         level = block[j] * qmat[j];
4130
4131 //        if(   bias+level >= (1<<QMAT_SHIFT)
4132 //           || bias-level >= (1<<QMAT_SHIFT)){
4133         if(((unsigned)(level+threshold1))>threshold2){
4134             if(level>0){
4135                 level= (bias + level)>>QMAT_SHIFT;
4136                 block[j]= level;
4137             }else{
4138                 level= (bias - level)>>QMAT_SHIFT;
4139                 block[j]= -level;
4140             }
4141             max |=level;
4142         }else{
4143             block[j]=0;
4144         }
4145     }
4146     *overflow= s->max_qcoeff < max; //overflow might have happened
4147
4148     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4149     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4150         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4151
4152     return last_non_zero;
4153 }
4154
4155 #define OFFSET(x) offsetof(MpegEncContext, x)
4156 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4157 static const AVOption h263_options[] = {
4158     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4159     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4160     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { 0 }, 0, INT_MAX, VE },
4161     FF_MPV_COMMON_OPTS
4162     { NULL },
4163 };
4164
4165 static const AVClass h263_class = {
4166     .class_name = "H.263 encoder",
4167     .item_name  = av_default_item_name,
4168     .option     = h263_options,
4169     .version    = LIBAVUTIL_VERSION_INT,
4170 };
4171
4172 AVCodec ff_h263_encoder = {
4173     .name           = "h263",
4174     .type           = AVMEDIA_TYPE_VIDEO,
4175     .id             = CODEC_ID_H263,
4176     .priv_data_size = sizeof(MpegEncContext),
4177     .init           = ff_MPV_encode_init,
4178     .encode2        = ff_MPV_encode_picture,
4179     .close          = ff_MPV_encode_end,
4180     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4181     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4182     .priv_class     = &h263_class,
4183 };
4184
4185 static const AVOption h263p_options[] = {
4186     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4187     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4188     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4189     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4190     FF_MPV_COMMON_OPTS
4191     { NULL },
4192 };
4193 static const AVClass h263p_class = {
4194     .class_name = "H.263p encoder",
4195     .item_name  = av_default_item_name,
4196     .option     = h263p_options,
4197     .version    = LIBAVUTIL_VERSION_INT,
4198 };
4199
4200 AVCodec ff_h263p_encoder = {
4201     .name           = "h263p",
4202     .type           = AVMEDIA_TYPE_VIDEO,
4203     .id             = CODEC_ID_H263P,
4204     .priv_data_size = sizeof(MpegEncContext),
4205     .init           = ff_MPV_encode_init,
4206     .encode2        = ff_MPV_encode_picture,
4207     .close          = ff_MPV_encode_end,
4208     .capabilities   = CODEC_CAP_SLICE_THREADS,
4209     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4210     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4211     .priv_class     = &h263p_class,
4212 };
4213
4214 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4215
4216 AVCodec ff_msmpeg4v2_encoder = {
4217     .name           = "msmpeg4v2",
4218     .type           = AVMEDIA_TYPE_VIDEO,
4219     .id             = CODEC_ID_MSMPEG4V2,
4220     .priv_data_size = sizeof(MpegEncContext),
4221     .init           = ff_MPV_encode_init,
4222     .encode2        = ff_MPV_encode_picture,
4223     .close          = ff_MPV_encode_end,
4224     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4225     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4226     .priv_class     = &msmpeg4v2_class,
4227 };
4228
4229 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4230
4231 AVCodec ff_msmpeg4v3_encoder = {
4232     .name           = "msmpeg4",
4233     .type           = AVMEDIA_TYPE_VIDEO,
4234     .id             = CODEC_ID_MSMPEG4V3,
4235     .priv_data_size = sizeof(MpegEncContext),
4236     .init           = ff_MPV_encode_init,
4237     .encode2        = ff_MPV_encode_picture,
4238     .close          = ff_MPV_encode_end,
4239     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4240     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4241     .priv_class     = &msmpeg4v3_class,
4242 };
4243
4244 FF_MPV_GENERIC_CLASS(wmv1)
4245
4246 AVCodec ff_wmv1_encoder = {
4247     .name           = "wmv1",
4248     .type           = AVMEDIA_TYPE_VIDEO,
4249     .id             = CODEC_ID_WMV1,
4250     .priv_data_size = sizeof(MpegEncContext),
4251     .init           = ff_MPV_encode_init,
4252     .encode2        = ff_MPV_encode_picture,
4253     .close          = ff_MPV_encode_end,
4254     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4255     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4256     .priv_class     = &wmv1_class,
4257 };